From cdcc9adb4474616beb487504bd24862597b1c722 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 21 Mar 2025 15:48:22 -0300 Subject: [PATCH 01/75] Initial commit --- include/nbl/builtin/hlsl/math/morton.hlsl | 36 +++++++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 37 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..22c56f8999 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +template) +struct code +{ + using this_t = code; + using U = make_unsigned; + + static this_t create(vector cartesian) + { + //... TODO ... + return this_t(); + } + + //operator+, operator-, operator>>, operator<<, and other bitwise ops + + U value; +}; + +} //namespace morton +} //namespace hlsl +} //namespace nbl + + + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 291ee64bad..14e5fe67db 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -289,6 +289,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") From 5fe6c0837ff53d156b9fc0500f3899c6c1c546c6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 23 Mar 2025 19:30:10 -0300 Subject: [PATCH 02/75] CHeckpoint before master merge --- examples_tests | 2 +- include/nbl/builtin/hlsl/math/morton.hlsl | 54 ++++++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/examples_tests b/examples_tests index 91dc3afe4c..f2ea51d0b3 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 91dc3afe4c66e5bdfd313ec37e7e1863daa52116 +Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22c56f8999..bf339f4d6f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -10,12 +10,64 @@ namespace hlsl namespace morton { -template) +namespace impl +{ + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded +template +struct decode_masks_array; + +#ifndef __HLSL_VERSION + +template +struct decode_masks_array +{ + static consteval vector generateMasks() + { + vector masks; + for (auto i = 0u; i < Dim; i++) + { + masks[i] = decode_mask_v << T(i); + } + return masks; + } + + NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); +}; + +#else +template +struct decode_masks_array +{ + NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); +}; +//template +//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +#endif + +} //namespace impl + + +template && 1 < D && D < 5) struct code { using this_t = code; using U = make_unsigned; + + static this_t create(vector cartesian) { //... TODO ... From f18b2fa2925cd7f5c5cc94a808cc518b0bd9baaa Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 17:21:37 -0300 Subject: [PATCH 03/75] Checkpoint before merging new type_traits change --- include/nbl/builtin/hlsl/math/morton.hlsl | 56 +++++++++++++++-------- include/nbl/builtin/hlsl/type_traits.hlsl | 6 +++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index bf339f4d6f..22081e2b7f 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,7 +13,7 @@ namespace morton namespace impl { -template +template struct decode_mask; template @@ -22,15 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +#ifndef __HLSL_VERSION + template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// Compile-time still a bit primitive in HLSL, we can support arbitrary-dimensional morton codes in C++ but HLSL's have to be hand coded -template -struct decode_masks_array; - -#ifndef __HLSL_VERSION - template struct decode_masks_array { @@ -47,31 +43,50 @@ struct decode_masks_array NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); }; -#else -template -struct decode_masks_array -{ - NBL_CONSTEXPR_STATIC_INLINE vector Masks = vector(decode_mask_v, decode_mask_v << T(1)); -}; -//template -//NBL_CONSTEXPR_STATIC_INLINE vector decode_masks_array::Masks = vector(decode_mask_v, decode_mask_v << T(1)); +template +NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; + #endif } //namespace impl +// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V +#ifndef __HLSL_VERSION + +#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > + +#else + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ + impl::decode_mask< U , D >::value << U (1),\ + impl::decode_mask< U , D >::value << U (2),\ + impl::decode_mask< U , D >::value << U (3)\ + ) +#endif + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && 1 < D && D < 5) struct code { using this_t = code; - using U = make_unsigned; - + using U = make_unsigned_t; +#ifdef __HLSL_VERSION + _Static_assert(is_same_v, + "make_signed requires that T shall be a (possibly cv-qualified) " + "integral type or enumeration but not a bool type."); +#endif static this_t create(vector cartesian) { - //... TODO ... - return this_t(); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); + printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + this_t foo; + foo.value = U(0); + return foo; } //operator+, operator-, operator>>, operator<<, and other bitwise ops @@ -79,6 +94,9 @@ struct code U value; }; +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + } //namespace morton } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 708f643ab0..222dbcdb7c 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -688,6 +688,12 @@ NBL_CONSTEXPR uint64_t extent_v = extent::value; template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { From 4ebc555d320cc3e678095d72437e07721dc1441b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:18:49 -0300 Subject: [PATCH 04/75] Works, but throws DXC warning --- include/nbl/builtin/hlsl/math/morton.hlsl | 25 +++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 22081e2b7f..058bdad862 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -22,11 +22,11 @@ struct decode_mask : integral_constant {}; template struct decode_mask : integral_constant::value << Dim) | T(1)> {}; -#ifndef __HLSL_VERSION - template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +#ifndef __HLSL_VERSION + template struct decode_masks_array { @@ -58,10 +58,11 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; #else // Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask< U , D >::value,\ - impl::decode_mask< U , D >::value << U (1),\ - impl::decode_mask< U , D >::value << U (2),\ - impl::decode_mask< U , D >::value << U (3)\ +// This will throw a DXC warning about the vector being truncated - no way around that +#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ ) #endif @@ -74,18 +75,12 @@ struct code using this_t = code; using U = make_unsigned_t; -#ifdef __HLSL_VERSION - _Static_assert(is_same_v, - "make_signed requires that T shall be a (possibly cv-qualified) " - "integral type or enumeration but not a bool type."); -#endif - static this_t create(vector cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(I, D); - printf("%d %d %d %d", Masks[0], Masks[1], Masks[2], Masks[3]); + NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; - foo.value = U(0); + foo.value = Masks[0]; return foo; } From 55a2ef637ca12c6c35b6f8001db6f619acfc2315 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 19:41:14 -0300 Subject: [PATCH 05/75] Added concept for valid morton dimensions --- include/nbl/builtin/hlsl/math/morton.hlsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 058bdad862..99980284e9 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -13,6 +13,19 @@ namespace morton namespace impl { +// Valid dimension for a morton code +#ifndef __HLSL_VERSION + +template +NBL_BOOL_CONCEPT MortonDimension = D > 1; + +#else + +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +#endif + template struct decode_mask; @@ -69,7 +82,7 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && 1 < D && D < 5) +template && impl::MortonDimension) struct code { using this_t = code; @@ -78,7 +91,6 @@ struct code static this_t create(vector cartesian) { NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - printf("%u %u %u %u", Masks[0], Masks[1], Masks[2]); this_t foo; foo.value = Masks[0]; return foo; From f5162561ee2203aa51c8c600aed225d679c9408d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 24 Mar 2025 21:28:07 -0300 Subject: [PATCH 06/75] Creation from vector working as intended --- include/nbl/builtin/hlsl/math/morton.hlsl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 99980284e9..aab8511b95 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" namespace nbl { @@ -88,12 +89,22 @@ struct code using this_t = code; using U = make_unsigned_t; - static this_t create(vector cartesian) + static this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC_INLINE vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t foo; - foo.value = Masks[0]; - return foo; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + const vector unsignedCartesian = bit_cast, vector >(cartesian); + U val = U(0); + [[unroll]] + // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating + // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], + // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] + // and so on until we get val[BitDwidth - 1] and stop. + for (U i = U(0); i < BitWidth; i++) + { + val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + } + this_t retVal = {val}; + return retVal; } //operator+, operator-, operator>>, operator<<, and other bitwise ops From 534d81bfc2ab1136d959a41ecee521990115d7bb Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 13:05:20 -0300 Subject: [PATCH 07/75] Added some extra macro specifiers, vector truncation with no warnings on HLSL side by specializing , a bunch of morton operators --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 8 +- .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 +++ include/nbl/builtin/hlsl/math/morton.hlsl | 181 ++++++++++++++++-- 3 files changed, 198 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3802bd69ea..a93727815b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -40,8 +40,11 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline +#define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -70,8 +73,11 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static +#define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC #define NBL_CONST_MEMBER_FUNC namespace nbl diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index 354937427a..f6ced52db1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,6 +1,8 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat/basic.h" + // stuff for C++ #ifndef __HLSL_VERSION #include @@ -92,4 +94,32 @@ struct blake3_hasher::update_impl,Dummy> } #endif } + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template +struct static_cast_helper, vector > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index aab8511b95..ecd94ce69e 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -3,6 +3,8 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" namespace nbl { @@ -15,18 +17,9 @@ namespace impl { // Valid dimension for a morton code -#ifndef __HLSL_VERSION - -template -NBL_BOOL_CONCEPT MortonDimension = D > 1; - -#else - template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -#endif - template struct decode_mask; @@ -73,11 +66,12 @@ NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; // Up to D = 4 supported // This will throw a DXC warning about the vector being truncated - no way around that -#define NBL_HLSL_MORTON_MASKS(U, D) vector< U , 4 >(impl::decode_mask_v< U , D >,\ +// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ - ) + )) #endif @@ -88,25 +82,134 @@ struct code { using this_t = code; using U = make_unsigned_t; + NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION - static this_t create(NBL_CONST_REF_ARG(vector) cartesian) + code() = default; + + // To immediately get compound operators and functional structs in CPP side + code(const I _value) : value(bit_cast(_value)){} + + #endif + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); + [[unroll]] - // We want to interleave the bits of each number in `unsignedCartesian`. We do this by enumerating - // val[0] = bit 0 of unsignedCartesian[0], val[1] = bit 0 of unsignedCartesian[1], ..., val[D-1] = bit 0 of unsignedCartesian[D-1], - // val[D] = bit 1 of unsignedCartesian[0], val[D+1] = bit 1 of unsignedCartesian[1], ..., val[2D-1] = bit 1 of unsignedCartesian[D-1] - // and so on until we get val[BitDwidth - 1] and stop. - for (U i = U(0); i < BitWidth; i++) + for (U dim = 0; dim < U(D); dim++) { - val |= (unsignedCartesian[i % D] & (U(1) << (i / D))) << (i - (i / D)); + [[unroll]] + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + { + val |= (unsignedCartesian[dim] & coordBit) << shift; + } } - this_t retVal = {val}; + + this_t retVal; + retVal.value = val; return retVal; } + // CPP can also have a constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + explicit operator vector() const noexcept + { + // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL + return _static_cast, this_t>(*this); + } + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value << bits; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value >> bits; + return retVal; + } + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t allOnes; + // allOnes encodes a cartesian coordinate with all values set to 1 + allOnes.value = (U(1) << D) - U(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + return operator~() + allOnes; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + + //operator+, operator-, operator>>, operator<<, and other bitwise ops U value; @@ -116,6 +219,44 @@ struct code #undef NBL_HLSL_MORTON_MASKS } //namespace morton + +namespace impl +{ + +template +struct static_cast_helper, morton::code > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) + { + using U = typename morton::code::U; + NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate/dimension of index `dim` gets + // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + vector cartesian; + for (U dim = 0; dim < U(D); dim++) + { + const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer + for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); + } + cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + } + return cartesian; + } +}; + +} // namespace impl + } //namespace hlsl } //namespace nbl From 625639031599374d44e8f8a6a79570471f0f4a9c Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 26 Mar 2025 14:53:42 -0300 Subject: [PATCH 08/75] Add safe copile-time vector truncation and some function specifiers for both cpp and hlsl --- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 66 +++++++++---------- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 ++++++++++ .../nbl/builtin/hlsl/cpp_compat/vector.hlsl | 30 --------- include/nbl/builtin/hlsl/math/morton.hlsl | 34 ---------- src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 72 insertions(+), 97 deletions(-) create mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..cb06447aa1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -6,4 +6,7 @@ #include #include +// Had to push some stuff here to avoid circular dependencies +#include + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a93727815b..41e920e41e 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -2,35 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_ #include - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} +#include #ifndef __HLSL_VERSION #include @@ -43,8 +15,7 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline -#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_FUNC -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_FORCE_INLINE NBL_CONSTEXPR_STATIC +#define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const namespace nbl::hlsl @@ -68,6 +39,7 @@ namespace nbl::hlsl #else + #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC @@ -77,8 +49,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_FORCED_INLINE_FUNC NBL_CONSTEXPR_STATIC_INLINE_FUNC -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC namespace nbl { @@ -106,4 +77,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ +return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl new file mode 100644 index 0000000000..524d1fa45e --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl @@ -0,0 +1,35 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ + +#include +#include +#include + +// To prevent implicit truncation warnings +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(N <= M) +struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) + { + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < N; i++) + { + retVal[i] = val[i]; + } + return retVal; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl index f6ced52db1..354937427a 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/vector.hlsl @@ -1,8 +1,6 @@ #ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_VECTOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat/basic.h" - // stuff for C++ #ifndef __HLSL_VERSION #include @@ -94,32 +92,4 @@ struct blake3_hasher::update_impl,Dummy> } #endif } - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template -struct static_cast_helper, vector > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index ecd94ce69e..50cf78caae 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -32,49 +32,15 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -#ifndef __HLSL_VERSION - -template -struct decode_masks_array -{ - static consteval vector generateMasks() - { - vector masks; - for (auto i = 0u; i < Dim; i++) - { - masks[i] = decode_mask_v << T(i); - } - return masks; - } - - NBL_CONSTEXPR_STATIC_INLINE vector Masks = generateMasks(); -}; - -template -NBL_CONSTEXPR vector decode_masks = decode_masks_array::Masks; - -#endif - } //namespace impl -// HLSL only supports up to D = 4, and even then having this in a more generic manner is blocked by a DXC issue targeting SPIR-V -#ifndef __HLSL_VERSION - -#define NBL_HLSL_MORTON_MASKS(U, D) impl::decode_masks< U , D > - -#else - // Up to D = 4 supported -// This will throw a DXC warning about the vector being truncated - no way around that -// The only way to avoid this atm (until they fix issue 7006 below) is to wrap the whole class in a macro and expand it for each possible value of `D` #define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ impl::decode_mask_v< U , D > << U (1),\ impl::decode_mask_v< U , D > << U (2),\ impl::decode_mask_v< U , D > << U (3)\ )) -#endif - // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it template && impl::MortonDimension) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 2e68d1fdf7..fa548e210a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,6 +248,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 246cefc422e8ef7b36cd22c90a1f695d643c3b45 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:44:44 -0300 Subject: [PATCH 09/75] Morton class done! --- include/nbl/builtin/hlsl/math/morton.hlsl | 241 +++++++++++++++++++--- 1 file changed, 215 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 50cf78caae..dfe53c3446 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -68,18 +68,17 @@ struct code */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); const vector unsignedCartesian = bit_cast, vector >(cartesian); U val = U(0); [[unroll]] - for (U dim = 0; dim < U(D); dim++) + for (U coord = 0; coord < U(D); coord++) { [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1), shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= 1, shift += U(D) - 1) + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[dim] & coordBit) << shift; + val |= (unsignedCartesian[coord] & coordBit) << shift; } } @@ -112,6 +111,68 @@ struct code #endif + // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- + + /** + * @brief Extracts a single coordinate + * + * @param [in] coord The coordinate to extract + */ + NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits + // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift + // at the end to preserve sign. + // To this end, we first notice that the coordinate of index `coord` gets + // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not + // divide `BitWidth perfectly`). + // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones + // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. + + const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil + U coordVal = U(0); + // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer + [[unroll]] + for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) + { + coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); + } + return bit_cast(coordVal) >> (BitWidth - bitsCoord); + } + + /** + * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + /* + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return value & (U(1) << shift); + } + */ + + /** + * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord + * (for each value) has its highest bit set to 1. + * + * @param [in] coord The coordinate whose highest bit we want to get + */ + NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); + // This is the index of that bit as an index in the encoded value + const U shift = coordHighestBitIdx * U(D) + U(coord); + return (value | rhs.value) & (U(1) << shift); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -174,9 +235,153 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + this_t retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC + { + return value.operator!(); + } - //operator+, operator-, operator>>, operator<<, and other bitwise ops + NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + return (value & Masks[coord]) == (rhs.value & Masks[coord]); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return !coordEquals(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + vector retVal; + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) + retVal[coord] = coordNotEquals(rhs, coord); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return ! allEqual(rhs); + } + + + + template + NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); + Comparison comparison; + OppositeComparison oppositeComparison; + + // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison + #ifndef __HLSL_VERSION + if constexpr (is_unsigned_v) + #else + if (is_unsigned_v) + #endif + { + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit + else + { + // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar + // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned + // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this + // is the same as doing `z > w` again + // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` + // so again we can just return `z > w`. + // All three cases end up in the same expression. + if (logicalOrHighestBits(rhs, coord)) + return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); + // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + else + return comparison(value & Masks[coord], rhs.value & Masks[coord]); + } + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, greater_equal >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less >(rhs, coord); + } + + NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + { + return coordOrderCompare, less_equal >(rhs, coord); + } + + #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ + { \ + vector retVal; \ + [[unroll]] \ + for (uint16_t coord = 0; coord < D; coord++) \ + retVal[coord] = COMPARISON (rhs, coord); \ + return retVal; \ + } + + DEFINE_OPERATOR(< , coordLessThan); + DEFINE_OPERATOR(<= , coordLessThanEquals); + DEFINE_OPERATOR(> , coordGreaterThan); + DEFINE_OPERATOR(>= , coordGreaterThanEquals); U value; }; @@ -186,6 +391,7 @@ struct code } //namespace morton +// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` namespace impl { @@ -194,28 +400,11 @@ struct static_cast_helper, morton::code > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) { - using U = typename morton::code::U; - NBL_CONSTEXPR_STATIC U BitWidth = morton::code::BitWidth; - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate/dimension of index `dim` gets - // `bits(dim) = ceil((BitWidth - dim)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(dim)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - vector cartesian; - for (U dim = 0; dim < U(D); dim++) + [[unroll]] + for (uint16_t coord = 0; coord < D; coord++) { - const U bitsDim = (BitWidth - dim + U(D) - 1) / U(D); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `dim`, but I feel this is clearer - for (U valBit = dim, coordBit = U(1) << dim, shift = dim; valBit < BitWidth; valBit += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (val.value & coordBit) << (BitWidth - bitsDim - shift); - } - cartesian[dim] = (bit_cast(coordVal) >> (BitWidth - bitsDim)); + cartesian[coord] = val.getCoordinate(coord); } return cartesian; } From 1c7f7911e416c8ec42ba3055b9da9a9da900d23f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:48:35 -0300 Subject: [PATCH 10/75] Remove some leftover commented code --- include/nbl/builtin/hlsl/math/morton.hlsl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index dfe53c3446..153ec08bf0 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -140,23 +140,6 @@ struct code return bit_cast(coordVal) >> (BitWidth - bitsCoord); } - /** - * @brief Returns an element of type U with the highest bit of the number encoded in `coord` set to its right value, and all other bits set to 0 - * - * @param [in] coord The coordinate whose highest bit we want to get - */ - /* - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - */ - /** * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord * (for each value) has its highest bit set to 1. From 508879948064ff01c05a9e1f2166d2261c17697f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 27 Mar 2025 18:56:57 -0300 Subject: [PATCH 11/75] Remove leaking macro --- include/nbl/builtin/hlsl/math/morton.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 153ec08bf0..4dc05738b6 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -366,6 +366,8 @@ struct code DEFINE_OPERATOR(> , coordGreaterThan); DEFINE_OPERATOR(>= , coordGreaterThanEquals); + #undef DEFINE_OPERATOR + U value; }; From e25a35cce8f0554baf98173f9cc1d1dd93629042 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 28 Mar 2025 20:16:00 -0300 Subject: [PATCH 12/75] Bugfixes with arithmetic --- include/nbl/builtin/hlsl/math/morton.hlsl | 108 +++++++++++++--------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl index 4dc05738b6..89d1a99749 100644 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -57,7 +57,7 @@ struct code code() = default; // To immediately get compound operators and functional structs in CPP side - code(const I _value) : value(bit_cast(_value)){} + code(const U _value) : value(_value) {} #endif @@ -69,7 +69,7 @@ struct code NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) { const vector unsignedCartesian = bit_cast, vector >(cartesian); - U val = U(0); + this_t retVal = { U(0) }; [[unroll]] for (U coord = 0; coord < U(D); coord++) @@ -78,12 +78,10 @@ struct code // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) { - val |= (unsignedCartesian[coord] & coordBit) << shift; + retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; } } - this_t retVal; - retVal.value = val; return retVal; } @@ -141,48 +139,43 @@ struct code } /** - * @brief Returns an element of type U by `or`ing this with rhs and extracting only the highest bit. Useful to know if either coord - * (for each value) has its highest bit set to 1. + * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) * - * @param [in] coord The coordinate whose highest bit we want to get + * @param [in] coord The coordinate whose highest bit we want to extract. */ - NBL_CONSTEXPR_INLINE_FUNC U logicalOrHighestBits(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC { // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these // bits is `bits(coord) - 1` const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); // This is the index of that bit as an index in the encoded value const U shift = coordHighestBitIdx * U(D) + U(coord); - return (value | rhs.value) & (U(1) << shift); + return value & (U(1) << shift); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value & rhs.value; + this_t retVal = { value & rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value | rhs.value; + this_t retVal = { value | rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value ^ rhs.value; + this_t retVal = { value ^ rhs.value }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = ~value; + this_t retVal = { ~value }; return retVal; } @@ -191,15 +184,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value << bits; + this_t retVal = { value << U(bits) }; return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC { - this_t retVal; - retVal.value = value >> bits; + this_t retVal = { value >> U(bits) }; return retVal; } @@ -209,19 +200,20 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - this_t allOnes; // allOnes encodes a cartesian coordinate with all values set to 1 - allOnes.value = (U(1) << D) - U(1); + const static this_t allOnes = { (U(1) << D) - U(1) }; // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 return operator~() + allOnes; } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -240,7 +232,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal; + this_t retVal = { U(0) }; [[unroll]] for (uint16_t coord = 0; coord < D; coord++) { @@ -293,17 +285,15 @@ struct code NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return ! allEqual(rhs); + return !allEqual(rhs); } - - - template + template NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); Comparison comparison; - OppositeComparison oppositeComparison; + OnSignMismatch onSignMismatch; // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison #ifndef __HLSL_VERSION @@ -317,39 +307,67 @@ struct code // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit else { - // I will give an example for the case of `Comparison` being `functional::less`, but other cases are similar - // If both are negative (both bits set to 1) then `x < y` iff `z > w` when `z,w` are the bit representations of `x,y` as unsigned - // If this is nonnegative and rhs is negative, it should return false. Since in this case `highestBit = 0` and `rhsHighestBit = 1` this - // is the same as doing `z > w` again - // If this is negative and rhs is nonnegative, it should return true. But in this case we have `highestBit = 1` and `rhsHighestBit = 0` - // so again we can just return `z > w`. - // All three cases end up in the same expression. - if (logicalOrHighestBits(rhs, coord)) - return oppositeComparison(value & Masks[coord], rhs.value & Masks[coord]); - // If neither of them have their highest bit set, both are nonnegative. Therefore, we can return the unsigned comparison + // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. + + // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of + // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. + // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. + // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. + // What that result should be is controlled by `OnSignMismatch`. + // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well + const U highestBit = extractHighestBit(coord); + const U rhsHighestBit = rhs.extractHighestBit(coord); + if (highestBit ^ rhsHighestBit) + return onSignMismatch(highestBit); + // If both are nonnegative, then we can just use the comparison as it comes. + // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. + // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, + // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the + // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one + // with a 0 is "more negative". else return comparison(value & Masks[coord], rhs.value & Masks[coord]); } } + + struct OnSignMismatchLessThan + { + // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise + // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set + bool operator()(U highestBit) + { + return !bool(highestBit); + } + }; NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, greater_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC { - return coordOrderCompare, less_equal >(rhs, coord); + return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); } #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ From 0d9dd4afa6190dd029cf0e8e311ec132a818ec4a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 15:25:38 -0300 Subject: [PATCH 13/75] Checkpoint, have to check why vector compat isn't working --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 6 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 34 ++ .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 13 + .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 153 +++++++ include/nbl/builtin/hlsl/functional.hlsl | 34 +- include/nbl/builtin/hlsl/math/morton.hlsl | 423 ------------------ include/nbl/builtin/hlsl/morton.hlsl | 72 +++ .../builtin/hlsl/spirv_intrinsics/core.hlsl | 3 +- src/nbl/builtin/CMakeLists.txt | 4 +- 9 files changed, 311 insertions(+), 431 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl create mode 100644 include/nbl/builtin/hlsl/morton.hlsl diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 41e920e41e..77d9d887bd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -17,6 +17,7 @@ #define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl { @@ -49,7 +50,8 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC +#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl { @@ -100,7 +102,7 @@ struct static_cast_helper template NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) { -return impl::static_cast_helper::cast(v); + return impl::static_cast_helper::cast(v); } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 1d43d9b14a..7b8726566f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -103,6 +103,10 @@ template struct nMax_helper; template struct nClamp_helper; +template +struct addCarry_helper; +template +struct subBorrow_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -162,6 +166,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -599,6 +606,33 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = retVal.result < operand1 ? T(1) : T(0); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + return retVal; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index b695c4b82b..1f1957dbbd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -217,6 +217,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl new file mode 100644 index 0000000000..3178159794 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -0,0 +1,153 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + // To immediately get compound operators and functional structs in CPP side + explicit emulated_uint64_t(const storage_t _data) : data(_data) {} + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t higherBitsMask = ~uint32_t(0) << shift; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +} //namespace nbl +} //namespace hlsl + + + +#endif diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..3cf24193a4 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -165,7 +165,7 @@ COMPOUND_ASSIGN(divides) // ----------------- End of compound assignment ops ---------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -200,13 +200,39 @@ struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { return condition ? lhs : rhs; } }; -} -} +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +// Declare template, but left unimplemented by default +template +struct logical_right_shift_operator; + +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl deleted file mode 100644 index 89d1a99749..0000000000 --- a/include/nbl/builtin/hlsl/math/morton.hlsl +++ /dev/null @@ -1,423 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ - -#include "nbl/builtin/hlsl/concepts/core.hlsl" -#include "nbl/builtin/hlsl/bit.hlsl" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" - -namespace nbl -{ -namespace hlsl -{ -namespace morton -{ - -namespace impl -{ - -// Valid dimension for a morton code -template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; - -template -struct decode_mask; - -template -struct decode_mask : integral_constant {}; - -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; - -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; - -} //namespace impl - -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) - -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && impl::MortonDimension) -struct code -{ - using this_t = code; - using U = make_unsigned_t; - NBL_CONSTEXPR_STATIC U BitWidth = U(8 * sizeof(U)); - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - // To immediately get compound operators and functional structs in CPP side - code(const U _value) : value(_value) {} - - #endif - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(vector) cartesian) - { - const vector unsignedCartesian = bit_cast, vector >(cartesian); - this_t retVal = { U(0) }; - - [[unroll]] - for (U coord = 0; coord < U(D); coord++) - { - [[unroll]] - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - for (U valBitIdx = coord, coordBit = U(1), shift = coord; valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= 1, shift += U(D) - 1) - { - retVal.value |= (unsignedCartesian[coord] & coordBit) << shift; - } - } - - return retVal; - } - - // CPP can also have a constructor - #ifndef __HLSL_VERSION - - /** - * @brief Creates a Morton code from a set of cartesian coordinates - * - * @param [in] cartesian Coordinates to encode - */ - code(NBL_CONST_REF_ARG(vector) cartesian) - { - *this = create(cartesian); - } - - /** - * @brief Decodes this Morton code back to a set of cartesian coordinates - */ - explicit operator vector() const noexcept - { - // Definition below, we override `impl::static_cast_helper` to have this conversion in both CPP/HLSL - return _static_cast, this_t>(*this); - } - - #endif - - // --------------------------------------------------------- AUX METHODS ------------------------------------------------------------------- - - /** - * @brief Extracts a single coordinate - * - * @param [in] coord The coordinate to extract - */ - NBL_CONSTEXPR_INLINE_FUNC I getCoordinate(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Converting back has an issue with bit-width: when encoding (if template parameter `I` is signed) we cut off the highest bits - // that actually indicated sign. Therefore what we do is set the highest bits instead of the lowest then do an arithmetic right shift - // at the end to preserve sign. - // To this end, we first notice that the coordinate of index `coord` gets - // `bits(coord) = ceil((BitWidth - coord)/D)` bits when encoded (so the first dimensions get more bits than the last ones if `D` does not - // divide `BitWidth perfectly`). - // Then instead of unpacking all the bits for that coordinate as the lowest bits, we unpack them as the highest ones - // by shifting everything `BitWidth - bits(coord)` bits to the left, then at the end do a final *arithmetic* bitshift right by the same amount. - - const U bitsCoord = BitWidth / U(D) + ((coord < BitWidth % D) ? U(1) : U(0)); // <- this computes the ceil - U coordVal = U(0); - // Control can be simplified by running a bound on just coordBit based on `BitWidth` and `coord`, but I feel this is clearer - [[unroll]] - for (U valBitIdx = U(coord), coordBit = U(1) << U(coord), shift = U(coord); valBitIdx < BitWidth; valBitIdx += U(D), coordBit <<= U(D), shift += U(D) - 1) - { - coordVal |= (value & coordBit) << (BitWidth - bitsCoord - shift); - } - return bit_cast(coordVal) >> (BitWidth - bitsCoord); - } - - /** - * @brief Returns an element of type U by extracting only the highest bit (of the bits used to encode `coord`) - * - * @param [in] coord The coordinate whose highest bit we want to extract. - */ - NBL_CONSTEXPR_INLINE_FUNC U extractHighestBit(uint16_t coord) NBL_CONST_MEMBER_FUNC - { - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const U coordHighestBitIdx = BitWidth / U(D) - ((U(coord) < BitWidth % U(D)) ? U(0) : U(1)); - // This is the index of that bit as an index in the encoded value - const U shift = coordHighestBitIdx * U(D) + U(coord); - return value & (U(1) << shift); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value & rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value | rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value ^ rhs.value }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = { ~value }; - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - NBL_CONSTEXPR_INLINE_FUNC this_t operator<<(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value << U(bits) }; - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator>>(uint16_t bits) NBL_CONST_MEMBER_FUNC - { - this_t retVal = { value >> U(bits) }; - return retVal; - } - - #endif - - // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - // allOnes encodes a cartesian coordinate with all values set to 1 - const static this_t allOnes = { (U(1) << D) - U(1) }; - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - return operator~() + allOnes; - } - - // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - - // CHANGED FOR DEBUG: REMEMBER TO CHANGE BACK - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~Masks[coord])) + (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - this_t retVal = { U(0) }; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & Masks[coord]) - (rhs.value & Masks[coord])) & Masks[coord]; - } - return retVal; - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC bool operator!() NBL_CONST_MEMBER_FUNC - { - return value.operator!(); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - return (value & Masks[coord]) == (rhs.value & Masks[coord]); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool allEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return value == rhs.value; - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordNotEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return !coordEquals(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC vector operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - vector retVal; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - retVal[coord] = coordNotEquals(rhs, coord); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC bool notAllEqual(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !allEqual(rhs); - } - - template - NBL_CONSTEXPR_INLINE_FUNC bool coordOrderCompare(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - NBL_CONSTEXPR_STATIC vector Masks = NBL_HLSL_MORTON_MASKS(U, D); - Comparison comparison; - OnSignMismatch onSignMismatch; - - // When unsigned, bit representation is the same but with 0s inbetween bits. In particular, we can still use unsigned comparison - #ifndef __HLSL_VERSION - if constexpr (is_unsigned_v) - #else - if (is_unsigned_v) - #endif - { - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - // When signed, since the representation is unsigned, we need to divide behaviour based on highest bit - else - { - // I will give an example for `operator<` but the same reasoning holds for all others. Some abuse of notation but hopefully it's clear. - - // If `this[coord] >= 0` and `rhs[coord] < 0` then `this[coord] < rhs[coord]` returns false. Notice that in this case, the highest bit of - // `value` (of the bits representing the number encoded in `coord`) is `0`, while the highest bit for rhs is `1`. - // Similarly, if `this[coord] < 0` and `rhs[coord] >= 0` then `this[coord] < rhs[coord]` returns true, and the highest bit situation is inverted. - // This means that if the signs of `this[coord]` and `rhs[coord]` are not equal, the result depends on the sign of `this[coord]`. - // What that result should be is controlled by `OnSignMismatch`. - // Finally, notice that if only one of those bits is set to 1, then the `xor` of that highest bit yields 1 as well - const U highestBit = extractHighestBit(coord); - const U rhsHighestBit = rhs.extractHighestBit(coord); - if (highestBit ^ rhsHighestBit) - return onSignMismatch(highestBit); - // If both are nonnegative, then we can just use the comparison as it comes. - // If both are negative, it just so happens that applying the same operator to their unsigned bitcasted representations yields the same result. - // For `operator<`, for example, consider two negative numbers. Starting from the MSB (we know it's `1` for both in this case) and moving to the right, - // consider what happens when we encounter the first bit where they mismatch: the one with a `0` at position `k` (by position I mean counted from the - // left, starting at 0) is adding at most `2^k - 1` in the lowest bits, while the one with a `1` is adding exactly `2^k`. This means that the one - // with a 0 is "more negative". - else - return comparison(value & Masks[coord], rhs.value & Masks[coord]); - } - } - - struct OnSignMismatchLessThan - { - // On a sign mismatch, `thisrhs` is true if this is non-negative (`highestBit` set to `0`) and false otherwise - // Therefore since it takes a number with only the highest bit set we only have to return the opposite of whether there is in fact a bit set - bool operator()(U highestBit) - { - return !bool(highestBit); - } - }; - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordLessThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchLessThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThan(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - NBL_CONSTEXPR_INLINE_FUNC bool coordGreaterThanEquals(NBL_CONST_REF_ARG(this_t) rhs, uint16_t coord) NBL_CONST_MEMBER_FUNC - { - return coordOrderCompare, OnSignMismatchGreaterThan>(rhs, coord); - } - - #define DEFINE_OPERATOR(OP, COMPARISON) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP##(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC \ - { \ - vector retVal; \ - [[unroll]] \ - for (uint16_t coord = 0; coord < D; coord++) \ - retVal[coord] = COMPARISON (rhs, coord); \ - return retVal; \ - } - - DEFINE_OPERATOR(< , coordLessThan); - DEFINE_OPERATOR(<= , coordLessThanEquals); - DEFINE_OPERATOR(> , coordGreaterThan); - DEFINE_OPERATOR(>= , coordGreaterThanEquals); - - #undef DEFINE_OPERATOR - - U value; -}; - -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS - -} //namespace morton - -// Still in nbl::hlsl we can go to nbl::hlsl::impl and specialize the `static_cast_helper` -namespace impl -{ - -template -struct static_cast_helper, morton::code > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code) val) - { - vector cartesian; - [[unroll]] - for (uint16_t coord = 0; coord < D; coord++) - { - cartesian[coord] = val.getCoordinate(coord); - } - return cartesian; - } -}; - -} // namespace impl - -} //namespace hlsl -} //namespace nbl - - - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..89eddf8675 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,72 @@ +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; + +// Masks + +template +struct decode_mask; + +template +struct decode_mask : integral_constant {}; + +template +struct decode_mask : integral_constant::value << Dim) | T(1)> {}; + +template +NBL_CONSTEXPR T decode_mask_v = decode_mask::value; + +// Decode masks are different for each dimension + +template +struct MortonDecoder; + +} //namespace impl + +// Up to D = 4 supported +#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ + impl::decode_mask_v< U , D > << U (1),\ + impl::decode_mask_v< U , D > << U (2),\ + impl::decode_mask_v< U , D > << U (3)\ + )) + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + + + storage_t value; +}; + +// Don't forget to delete this macro after usage +#undef NBL_HLSL_MORTON_MASKS + +} //namespace morton +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d351cab07d..d8d90de726 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,13 +4,14 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" #include #include #include -#include namespace nbl { diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fa548e210a..a11a26d69a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,6 +214,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable @@ -291,7 +292,6 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl" LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/morton.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") @@ -368,5 +368,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 89d2bf2a5d9fab347850babe31fdc8f0a95c64f6 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 16:19:20 -0300 Subject: [PATCH 14/75] Refactor morton class, get new conversion running --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 8 +- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 11 ++ include/nbl/builtin/hlsl/morton.hlsl | 175 +++++++++++++++++- 3 files changed, 186 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7b8726566f..92fc9e929b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -611,11 +611,11 @@ template struct addCarry_helper { using return_t = spirv::AddCarryOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = operand1 + operand2; - retVal.carry = retVal.result < operand1 ? T(1) : T(0); + retVal.carry = T(retVal.result < operand1); return retVal; } }; @@ -624,11 +624,11 @@ template struct subBorrow_helper { using return_t = spirv::SubBorrowOutput; - NBL_CONSTEXPR_STATIC_INLINE_FUNC return_t __call(const T operand1, const T operand2) + constexpr static inline return_t __call(const T operand1, const T operand2) { return_t retVal; retVal.result = static_cast(operand1 - operand2); - retVal.borrow = operand1 >= operand2 ? T(0) : T(1); + retVal.borrow = T(operand1 < operand2); return retVal; } }; diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3178159794..c4f1f1ef1b 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -40,6 +40,17 @@ struct emulated_uint64_t return retVal; } + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 89eddf8675..d4ada29d70 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -1,11 +1,12 @@ -#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ -#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl { @@ -35,11 +36,177 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- + // Decode masks are different for each dimension +// Decoder works with unsigned, cast to sign depends on the Morton class +// Bit width checks happen in Morton class as well -template +template struct MortonDecoder; +// Specializations for lack of uint64_t + +template +struct MortonDecoder<2, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<3, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<4, Bits, emulated_uint64_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + + arithmetic_right_shift_operator rightShift; + + emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded.data.y); + } +}; + +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off + _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off + _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off + _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off + _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off + _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), + conditional_value<(Bits <= 6), uint16_t, uint16_t(2), + conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) + _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off + _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off + _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off + _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + + encode_t decoded = encodedValue & DecodeMasks[0]; + // First iteration is special + decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; + [[unroll]] + for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; + } + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); + + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off + _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off + _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off + _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off + _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off + + encode_t decoded = encodedValue & DecodeMasks[0]; + [[unroll]] + for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + { + decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + } + return _static_cast(decoded); + } +}; + } //namespace impl // Up to D = 4 supported @@ -56,7 +223,7 @@ struct code { using this_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth>16), conditional_t<(TotalBitWidth>32), _uint64_t, uint32_t>, uint16_t> ; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; storage_t value; From de4d0fb2f266da125d94801c5c38bd81a9260acd Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 2 Apr 2025 23:45:53 -0300 Subject: [PATCH 15/75] Add new classes for encoding/decoding of mortn codes --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 57 ++++ include/nbl/builtin/hlsl/morton.hlsl | 287 ++++++++++++++++-- 2 files changed, 312 insertions(+), 32 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index c4f1f1ef1b..3794031c8e 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" namespace nbl { @@ -156,6 +157,62 @@ constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) #endif +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + } +}; + +} //namespace impl + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d4ada29d70..e2e1596587 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -22,7 +22,7 @@ namespace impl template NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; -// Masks +// Basic decode masks template struct decode_mask; @@ -36,17 +36,240 @@ struct decode_mask : integral_constant::value template NBL_CONSTEXPR T decode_mask_v = decode_mask::value; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y + +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ +}; + +#ifndef __HLSL_VERSION + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ +}; + +#else + +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +{\ + NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ +};\ +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +#endif + +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ + template\ + NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; + +NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off + +#undef NBL_MORTON_DECODE_MASK +#undef NBL_MORTON_EMULATED_DECODE_MASK +#undef NBL_MORTON_GENERIC_DECODE_MASK + +// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- + +template +struct MortonEncoder; + +template +struct MortonEncoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 16) + { + encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + } + encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; + return encoded; + } +}; + +template +struct MortonEncoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 12) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; + } + encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; + return encoded; + } +}; -// Decode masks are different for each dimension -// Decoder works with unsigned, cast to sign depends on the Morton class -// Bit width checks happen in Morton class as well +template +struct MortonEncoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator leftShift; + encode_t encoded = _static_cast(decodedValue); + NBL_IF_CONSTEXPR(Bits > 8) + { + encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + } + encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; + return encoded; + } +}; + +// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- template struct MortonDecoder; -// Specializations for lack of uint64_t +template +struct MortonDecoder<2, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_2_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + } + NBL_IF_CONSTEXPR(Bits > 16) + { + decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; + } + + return _static_cast(decoded); + } +}; + +template +struct MortonDecoder<3, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_3_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; + } + NBL_IF_CONSTEXPR(Bits > 3) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; + } + NBL_IF_CONSTEXPR(Bits > 6) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; + } + NBL_IF_CONSTEXPR(Bits > 12) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; + } + + return _static_cast(decoded); + } +}; +template +struct MortonDecoder<4, Bits, encode_t> +{ + template + NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator rightShift; + encode_t decoded = encodedValue & morton_mask_4_0_v; + NBL_IF_CONSTEXPR(Bits > 1) + { + decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; + } + NBL_IF_CONSTEXPR(Bits > 2) + { + decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; + } + NBL_IF_CONSTEXPR(Bits > 4) + { + decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; + } + NBL_IF_CONSTEXPR(Bits > 8) + { + decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + } + + return _static_cast(decoded); + } +}; + +/* template struct MortonDecoder<2, Bits, emulated_uint64_t> { @@ -55,12 +278,12 @@ struct MortonDecoder<2, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), // Groups bits by 1 on, 1 off - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), // Groups bits by 2 on, 2 off - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), // Groups bits by 4 on, 4 off - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), // Groups bits by 8 on, 8 off - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), // Groups bits by 16 on, 16 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) };// Groups bits by 32 on, 32 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), + emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), + emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), + emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), + emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; arithmetic_right_shift_operator rightShift; @@ -84,11 +307,11 @@ struct MortonDecoder<3, Bits, emulated_uint64_t> conditional_value<(Bits <= 6), uint16_t, uint16_t(2), conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), // Groups bits by 1 on, 2 off (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), // Groups bits by 3 on, 6 off - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), // Groups bits by 6 on, 12 off - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), // Groups bits by 12 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) + emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), + emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), + emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) arithmetic_right_shift_operator rightShift; @@ -112,11 +335,11 @@ struct MortonDecoder<4, Bits, emulated_uint64_t> { NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), // Groups bits by 1 on, 3 off - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), // Groups bits by 2 on, 6 off - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), // Groups bits by 4 on, 12 off - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), // Groups bits by 8 on, 24 off - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) };// Groups bits by 16 on, 48 off + NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), + emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), + emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), + emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), + emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; arithmetic_right_shift_operator rightShift; @@ -207,14 +430,9 @@ struct MortonDecoder<4, Bits, encode_t> } }; -} //namespace impl +*/ -// Up to D = 4 supported -#define NBL_HLSL_MORTON_MASKS(U, D) _static_cast > (vector< U , 4 >(impl::decode_mask_v< U , D >,\ - impl::decode_mask_v< U , D > << U (1),\ - impl::decode_mask_v< U , D > << U (2),\ - impl::decode_mask_v< U , D > << U (3)\ - )) +} //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 // In particular, `Masks` should be a `const static` member field instead of appearing in every method using it @@ -227,10 +445,15 @@ struct code storage_t value; -}; -// Don't forget to delete this macro after usage -#undef NBL_HLSL_MORTON_MASKS + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif +}; } //namespace morton } //namespace hlsl From 799420e9dfa1f8bd8039fd724edea4ecf3133a87 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 16:20:54 -0300 Subject: [PATCH 16/75] Fix conversion operators --- .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 33 ++- include/nbl/builtin/hlsl/morton.hlsl | 279 ++++++------------ 2 files changed, 116 insertions(+), 196 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl index 3794031c8e..ab08e1ff38 100644 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl @@ -24,9 +24,6 @@ struct emulated_uint64_t emulated_uint64_t() = default; - // To immediately get compound operators and functional structs in CPP side - explicit emulated_uint64_t(const storage_t _data) : data(_data) {} - #endif /** @@ -52,6 +49,16 @@ struct emulated_uint64_t return create(storage_t(hi, lo)); } + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -115,9 +122,11 @@ struct left_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = ~uint32_t(0) << shift; + const uint32_t higherBitsMask = (~uint32_t(0)) << shift; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; return emulated_uint64_t::create(retValData); @@ -132,6 +141,8 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { + if (!bits) + return operand; const uint32_t _bits = uint32_t(bits); const uint32_t shift = ComponentBitWidth - _bits; const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; @@ -173,10 +184,10 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { - using To = Unsigned; + using To = uint64_t; using From = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) @@ -199,15 +210,15 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) > sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) > sizeof(uint32_t))) > +template<> +struct static_cast_helper { using To = emulated_uint64_t; - using From = Unsigned; + using From = uint64_t; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) { - return emulated_uint64_t::create(_static_cast(u >> 32), _static_cast(u)); + return emulated_uint64_t::create(u); } }; diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2e1596587..07aa21b821 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -46,44 +46,41 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; #ifndef __HLSL_VERSION -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE));\ -}; +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ +#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ };\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = emulated_uint64_t::create(uint32_t(0x##HEX_HIGH_VALUE), uint32_t(0x##HEX_LOW_VALUE)); +NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); #endif -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_HIGH_VALUE, HEX_LOW_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, 0x##HEX_HIGH_VALUE##HEX_LOW_VALUE)\ +#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ + NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ + NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ template\ NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; -NBL_MORTON_DECODE_MASK(2, 0, 55555555, 55555555) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, 33333333, 33333333) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, 0F0F0F0F, 0F0F0F0F) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, 00FF00FF, 00FF00FF) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, 0000FFFF, 0000FFFF) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, 00000000, FFFFFFFF) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, 12492492, 49249249) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, 01C0E070, 381C0E07) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, 0FC003F0, 00FC003F) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, 0000FFF0, 00000FFF) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, 00000000, 00FFFFFF) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, 11111111, 11111111) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, 03030303, 03030303) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, 000F000F, 000F000F) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, 000000FF, 000000FF) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, 00000000, 0000FFFF) // Groups bits by 16 on, 48 off +NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off +NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off + +NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits +NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off +NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off +NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off +NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off + +NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off +NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off #undef NBL_MORTON_DECODE_MASK #undef NBL_MORTON_EMULATED_DECODE_MASK @@ -269,193 +266,105 @@ struct MortonDecoder<4, Bits, encode_t> } }; -/* -template -struct MortonDecoder<2, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); +} //namespace impl - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[6] = { emulated_uint64_t::create(uint32_t(0x55555555), uint32_t(0x55555555)), - emulated_uint64_t::create(uint32_t(0x33333333), uint32_t(0x33333333)), - emulated_uint64_t::create(uint32_t(0x0F0F0F0F), uint32_t(0x0F0F0F0F)), - emulated_uint64_t::create(uint32_t(0x00FF00FF), uint32_t(0x00FF00FF)), - emulated_uint64_t::create(uint32_t(0x0000FFFF), uint32_t(0x0000FFFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0xFFFFFFFF)) }; +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - arithmetic_right_shift_operator rightShift; + + storage_t value; - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- -template -struct MortonDecoder<3, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; + #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x12492492), uint32_t(0x49249249)), (also only considers 21 bits) - emulated_uint64_t::create(uint32_t(0x01C0E070), uint32_t(0x381C0E07)), - emulated_uint64_t::create(uint32_t(0x0FC003F0), uint32_t(0x00FC003F)), - emulated_uint64_t::create(uint32_t(0x0000FFF0), uint32_t(0x00000FFF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x00FFFFFF)) }; (40 off if you're feeling pedantic) + code() = default; - arithmetic_right_shift_operator rightShift; + #endif - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & DecodeMasks[1]; + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + using U = make_unsigned_t; + left_shift_operator leftShift; + storage_t encodedCartesian = _static_cast(uint64_t(0)); [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 2]; + encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); } - return _static_cast(decoded.data.y); + this_t retVal; + retVal.value = encodedCartesian; + return retVal; } -}; -template -struct MortonDecoder<4, Bits, emulated_uint64_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(emulated_uint64_t) encodedValue) - { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC emulated_uint64_t DecodeMasks[5] = { emulated_uint64_t::create(uint32_t(0x11111111), uint32_t(0x11111111)), - emulated_uint64_t::create(uint32_t(0x03030303), uint32_t(0x03030303)), - emulated_uint64_t::create(uint32_t(0x000F000F), uint32_t(0x000F000F)), - emulated_uint64_t::create(uint32_t(0x000000FF), uint32_t(0x000000FF)), - emulated_uint64_t::create(uint32_t(0x00000000), uint32_t(0x0000FFFF)) }; + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION - arithmetic_right_shift_operator rightShift; + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ - emulated_uint64_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | rightShift(decoded, shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded.data.y); - } -}; - -template -struct MortonDecoder<2, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + template + explicit code(NBL_CONST_REF_ARG(vector) cartesian) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[6] = { _static_cast(0x5555555555555555), // Groups bits by 1 on, 1 off - _static_cast(0x3333333333333333), // Groups bits by 2 on, 2 off - _static_cast(0x0F0F0F0F0F0F0F0F), // Groups bits by 4 on, 4 off - _static_cast(0x00FF00FF00FF00FF), // Groups bits by 8 on, 8 off - _static_cast(0x0000FFFF0000FFFF), // Groups bits by 16 on, 16 off - _static_cast(0x00000000FFFFFFFF) };// Groups bits by 32 on, 32 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 1; i < MaxIterations; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; - } - return _static_cast(decoded); + *this = create(cartesian); } -}; -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + // This one is defined later since it requires `static_cast_helper` specialization + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + + template + explicit operator vector() const noexcept { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = conditional_value<(Bits <= 3), uint16_t, uint16_t(1), - conditional_value<(Bits <= 6), uint16_t, uint16_t(2), - conditional_value<(Bits <= 12), uint16_t, uint16_t(3), uint16_t(4)>::value>::value>::value; - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1249249249249249), // Groups bits by 1 on, 2 off (also only considers 21 bits) - _static_cast(0x01C0E070381C0E07), // Groups bits by 3 on, 6 off - _static_cast(0x0FC003F000FC003F), // Groups bits by 6 on, 12 off - _static_cast(0x0000FFF000000FFF), // Groups bits by 12 on, 24 off - _static_cast(0x0000000000FFFFFF) };// Groups bits by 24 on, 48 off (40 off if you're feeling pedantic) - - encode_t decoded = encodedValue & DecodeMasks[0]; - // First iteration is special - decoded = (decoded | (decoded >> 2) | (decoded >> 4)) & DecodeMasks[1]; - [[unroll]] - for (uint16_t i = 0, shift = 6; i < MaxIterations - 1; i++, shift <<= 1) - { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 2]; - } - return _static_cast(decoded); + return _static_cast, morton::code, Bits, D>>(*this); } + + #endif }; -template -struct MortonDecoder<4, Bits, encode_t> +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl { - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - NBL_CONSTEXPR_STATIC uint16_t MaxIterations = uint16_t(mpl::log2_v) + uint16_t(!mpl::is_pot_v); - - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[5] = { _static_cast(0x1111111111111111), // Groups bits by 1 on, 3 off - _static_cast(0x0303030303030303), // Groups bits by 2 on, 6 off - _static_cast(0x000F000F000F000F), // Groups bits by 4 on, 12 off - _static_cast(0x000000FF000000FF), // Groups bits by 8 on, 24 off - _static_cast(0x000000000000FFFF) };// Groups bits by 16 on, 48 off - - encode_t decoded = encodedValue & DecodeMasks[0]; - [[unroll]] - for (uint16_t i = 0, shift = 3; i < MaxIterations; i++, shift <<= 1) + using U = make_unsigned_t; + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + arithmetic_right_shift_operator rightShift; + vector cartesian; + for (uint16_t i = 0; i < D; i++) { - decoded = (decoded | (decoded >> shift)) & DecodeMasks[i + 1]; + cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); } - return _static_cast(decoded); + return cartesian; } }; -*/ - -} //namespace impl +} // namespace impl -// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 -// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it -template && D * Bits <= 64) -struct code -{ - using this_t = code; - NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; - using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - - - storage_t value; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - #ifndef __HLSL_VERSION - - code() = default; - - #endif -}; - -} //namespace morton } //namespace hlsl } //namespace nbl From 52323bc1f67e58b547c65be11ae9ac9d08e8e4ed Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 4 Apr 2025 23:45:39 -0300 Subject: [PATCH 17/75] Finish the rest of comparison ops and we're done! --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 + include/nbl/builtin/hlsl/functional.hlsl | 23 +- include/nbl/builtin/hlsl/morton.hlsl | 231 +++++++++++++++++- .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 +++ .../nbl/builtin/hlsl/portable/vector_t.hlsl | 18 ++ src/nbl/builtin/CMakeLists.txt | 1 + 6 files changed, 294 insertions(+), 11 deletions(-) create mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..a106cec440 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -329,6 +330,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 3cf24193a4..e5486e2727 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -195,7 +195,7 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; @@ -206,7 +206,7 @@ struct ternary_operator } }; -template +template struct left_shift_operator { using type_t = T; @@ -217,7 +217,7 @@ struct left_shift_operator } }; -template +template struct arithmetic_right_shift_operator { using type_t = T; @@ -228,9 +228,20 @@ struct arithmetic_right_shift_operator } }; -// Declare template, but left unimplemented by default -template -struct logical_right_shift_operator; +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + + } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 07aa21b821..499deb1db4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -266,6 +266,47 @@ struct MortonDecoder<4, Bits, encode_t> } }; +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods + +template +struct Equals; + +template +struct Equals +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + vector retVal; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + retVal[i] = (_value & rhs[i]) == rhs[i]; + } + return retVal; + } +}; + +template +struct Equals +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + Equals equals; + return equals(_value, interleaved); + } +}; + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -274,10 +315,10 @@ template; + using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; - storage_t value; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- @@ -325,26 +366,205 @@ struct code *this = create(cartesian); } - // This one is defined later since it requires `static_cast_helper` specialization - /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - explicit operator vector() const noexcept + constexpr inline explicit operator vector() const noexcept { return _static_cast, morton::code, Bits, D>>(*this); } #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + left_shift_operator leftShift; + // allOnes encodes a cartesian coordinate with all values set to 1 + this_t allOnes; + allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); + // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 + this_signed_t retVal; + retVal.value = (operator~() + allOnes).value; + return retVal; + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + this_t retVal; + retVal.value = _static_cast(uint64_t(0)); + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); + } + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + { + impl::Equals equals; + return equals(value, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator==(rhs); + } }; } //namespace morton +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + cartesian >> scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +#endif + // Specialize the `static_cast_helper` namespace impl { + // I must be of same signedness as the morton code, and be wide enough to hold each component template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > @@ -355,6 +575,7 @@ struct static_cast_helper, morton::code, Bits, D, _u using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; arithmetic_right_shift_operator rightShift; vector cartesian; + [[unroll]] for (uint16_t i = 0; i < D; i++) { cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl new file mode 100644 index 0000000000..ac081234ac --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl @@ -0,0 +1,30 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_UINT_64 +using portable_uint64_t = emulated_uint64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +#endif + +//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..dcaea97739 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -36,19 +36,37 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index a11a26d69a..d7005a1ed6 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -219,6 +219,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From b6b70030434018a9e70ea4c52c86d48c135cc94e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 19:41:08 -0300 Subject: [PATCH 18/75] Final Mortons --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 488 ++++++++++++++++++ .../nbl/builtin/hlsl/emulated/uint64_t.hlsl | 232 --------- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 3 +- include/nbl/builtin/hlsl/morton.hlsl | 107 +++- .../nbl/builtin/hlsl/portable/int64_t.hlsl | 31 ++ .../nbl/builtin/hlsl/portable/uint64_t.hlsl | 30 -- .../nbl/builtin/hlsl/portable/vector_t.hlsl | 17 + src/nbl/builtin/CMakeLists.txt | 4 +- 8 files changed, 641 insertions(+), 271 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/emulated/uint64_t.hlsl create mode 100644 include/nbl/builtin/hlsl/portable/int64_t.hlsl delete mode 100644 include/nbl/builtin/hlsl/portable/uint64_t.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..f3269cc6ba --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,488 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_uint64_t +{ + using storage_t = vector; + using this_t = emulated_uint64_t; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_uint64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + /** + * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `uint64` being emulated + * @param [in] lo Lowest 32 bits of the `uint64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return create(storage_t(hi, lo)); + } + + /** + * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * + * @param [in] _data `uint64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) + { + return create(_static_cast(u >> 32), _static_cast(u)); + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr inline this_t operator<<(uint16_t bits) const; + + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); + const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const this_t retVal = create(addResult); + return retVal; + } + + NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); + const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const this_t retVal = create(subResult); + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x == rhs.data.x && data.y == rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return data.x != rhs.data.x || data.y != rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x < rhs.data.x; + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return data.x > rhs.data.x; + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +struct emulated_int64_t : emulated_uint64_t +{ + using base_t = emulated_uint64_t; + using base_t::storage_t; + using this_t = emulated_int64_t; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + + #ifndef __HLSL_VERSION + + emulated_int64_t() = default; + + #endif + + /** + * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + return _static_cast(base_t::create(_data)); + } + + /** + * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern + * + * @param [in] hi Highest 32 bits of the `int64` being emulated + * @param [in] lo Lowest 32 bits of the `int64` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + { + return _static_cast(base_t::create(hi, lo)); + } + + /** + * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. + * + * @param [in] _data `int64_t` to be unpacked into high and low bits + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) + { + return _static_cast(base_t::create(_static_cast(i))); + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + // Only this one needs to be redefined since it's arithmetic + constexpr inline this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + // Same as unsigned but the topmost bits are compared as signed + NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) < _static_cast(rhs.data.x); + else + return data.y < rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + if (data.x != rhs.data.x) + return _static_cast(data.x) > _static_cast(rhs.data.x); + else + return data.y > rhs.data.y; + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct left_shift_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator leftShift; + return _static_cast(leftShift(_static_cast(operand), bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + if (!bits) + return operand; + const uint32_t _bits = uint32_t(bits); + const uint32_t shift = ComponentBitWidth - _bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +namespace impl +{ + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + To retVal; + retVal.data = i.data; + return retVal; + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + To retVal; + retVal.data = u.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = Unsigned; + using From = emulated_uint64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return _static_cast(u.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = uint64_t; + using From = emulated_uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + const To highBits = _static_cast(u.data.x) << To(32); + return highBits | _static_cast(u.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +{ + using To = emulated_uint64_t; + using From = Unsigned; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_uint64_t; + using From = uint64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + { + return emulated_uint64_t::create(u); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = Signed; + using From = emulated_int64_t; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return _static_cast(i.data.y); + } +}; + +template<> +struct static_cast_helper +{ + using To = int64_t; + using From = emulated_int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + const To highBits = _static_cast(i.data.x) << To(32); + return highBits | _static_cast(i.data.y); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) +struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +{ + using To = emulated_int64_t; + using From = Signed; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + } +}; + +template<> +struct static_cast_helper +{ + using To = emulated_int64_t; + using From = int64_t; + + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + { + return emulated_int64_t::create(i); + } +}; + +} //namespace impl + +} //namespace nbl +} //namespace hlsl + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl b/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl deleted file mode 100644 index ab08e1ff38..0000000000 --- a/include/nbl/builtin/hlsl/emulated/uint64_t.hlsl +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_EMULATED_UINT64_T_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/concepts/core.hlsl" - -namespace nbl -{ -namespace hlsl -{ - -struct emulated_uint64_t -{ - using storage_t = vector; - using this_t = emulated_uint64_t; - - storage_t data; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - - - #ifndef __HLSL_VERSION - - emulated_uint64_t() = default; - - #endif - - /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. - * - * @param [in] _data `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(uint16_t bits) const; - - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; - const this_t retVal = create(addResult); - return retVal; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; - const this_t retVal = create(subResult); - return retVal; - } - -}; - -template<> -struct left_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t higherBitsMask = (~uint32_t(0)) << shift; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | ((operand.data.y & higherBitsMask) >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - const uint32_t lowerBitsMask = ~uint32_t(0) >> shift; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, ((operand.data.x & lowerBitsMask) << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = Unsigned; - using From = emulated_uint64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return _static_cast(u.data.y); - } -}; - -template<> -struct static_cast_helper -{ - using To = uint64_t; - using From = emulated_uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > -{ - using To = emulated_uint64_t; - using From = Unsigned; - - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); - } -}; - -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = uint64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) - { - return emulated_uint64_t::create(u); - } -}; - -} //namespace impl - -} //namespace nbl -} //namespace hlsl - - - -#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index a106cec440..65a97bbe68 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,7 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include -#include +#include #include #include #include @@ -331,6 +331,7 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 499deb1db4..9c834424a8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -5,7 +5,7 @@ #include "nbl/builtin/hlsl/concepts/core.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" -#include "nbl/builtin/hlsl/emulated/uint64_t.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" namespace nbl @@ -275,14 +275,15 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; vector retVal; [[unroll]] for (uint16_t i = 0; i < D; i++) { - retVal[i] = (_value & rhs[i]) == rhs[i]; + retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); } return retVal; } @@ -293,7 +294,7 @@ struct Equals { template NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) _value, NBL_CONST_REF_ARG(vector) rhs) + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using U = make_unsigned_t; vector interleaved; @@ -303,10 +304,77 @@ struct Equals interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); } Equals equals; - return equals(_value, interleaved); + return equals(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux method for extracting highest bit, used by the comparison below +template +NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) +{ + // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these + // bits is `bits(coord) - 1` + const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); + // This is the index of that bit as an index in the encoded value + const uint16_t shift = coordHighestBitIdx * D + coord; + left_shift_operator leftShift; + return value & leftShift(_static_cast(uint16_t(1)), shift); +} + +template +struct BaseComparison +{ + NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + left_shift_operator leftShift; + vector retVal; + ComparisonOp comparison; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + storage_t thisCoord = value & leftShift(Mask, i); + storage_t rhsCoord = leftShift(rhs[i], i); + // If coordinate is negative, we add 1s in every bit not corresponding to coord + if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) + thisCoord = thisCoord | ~leftShift(Mask, i); + if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) + rhsCoord = rhsCoord | ~leftShift(Mask, i); + retVal[i] = comparison(thisCoord, rhsCoord); + } + return retVal; + } +}; + +template +struct BaseComparison +{ + template + NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > + operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using U = make_unsigned_t; + vector interleaved; + [[unroll]] + for (uint16_t i = 0; i < D; i++) + { + interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); + } + BaseComparison baseComparison; + return baseComparison(value, interleaved); } }; +template +struct LessThan : BaseComparison > {}; + +template +struct LessEquals : BaseComparison > {}; + + } //namespace impl // Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 @@ -490,8 +558,35 @@ struct code template enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) { - return !operator==(rhs); + return !operator== (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessThan lessThan; + return lessThan(value, rhs); } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + { + impl::LessEquals lessEquals; + return lessEquals(value, rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator<= (rhs); + } + + template + enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + { + return !operator< (rhs); + } + }; } //namespace morton diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..6929e160fa --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,31 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +template +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +using portable_uint64_t = emulated_uint64_t; +using portable_int64_t = emulated_int64_t; +#else +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +using portable_uint64_t = uint64_t; +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl b/include/nbl/builtin/hlsl/portable/uint64_t.hlsl deleted file mode 100644 index ac081234ac..0000000000 --- a/include/nbl/builtin/hlsl/portable/uint64_t.hlsl +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ -#define _NBL_BUILTIN_HLSL_PORTABLE_UINT64_T_INCLUDED_ - -#include -#include - -// define NBL_FORCE_EMULATED_UINT_64 to force using emulated uint64 - -namespace nbl -{ -namespace hlsl -{ -template -#ifdef __HLSL_VERSION -#ifdef NBL_FORCE_EMULATED_UINT_64 -using portable_uint64_t = emulated_uint64_t; -#else -using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; -#endif - -#else -using portable_uint64_t = uint64_t; -#endif - -//static_assert(sizeof(portable_uint64_t) == sizeof(uint64_t)); - -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index dcaea97739..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -51,6 +52,14 @@ template using portable_uint64_t3 = portable_vector_t3 >; template using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else // Float template @@ -67,6 +76,14 @@ template using portable_uint64_t3 = portable_vector_t3; template using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index d7005a1ed6..f03d8ae22c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -214,12 +214,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/uint64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 From 60ff99a4dadfdecc5abf59e4fb2d95e62d6ed929 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 23:20:42 -0300 Subject: [PATCH 19/75] Clean up the emulated int code, fix some constant creation in the morton code --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 317 ++++++++---------- include/nbl/builtin/hlsl/morton.hlsl | 15 +- 2 files changed, 161 insertions(+), 171 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index f3269cc6ba..cad10242f2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -5,31 +5,35 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + namespace nbl { namespace hlsl { -struct emulated_uint64_t +template +struct emulated_int64_base { - using storage_t = vector; - using this_t = emulated_uint64_t; + using storage_t = vector; + using this_t = emulated_int64_base; - storage_t data; + storage_t data; // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - #ifndef __HLSL_VERSION - emulated_uint64_t() = default; + emulated_int64_base() = default; #endif /** - * @brief Creates an `emulated_uint64_t` from a vector of two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -39,10 +43,10 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from two `uint32_t`s representing its bitpattern + * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64` being emulated - * @param [in] lo Lowest 32 bits of the `uint64` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) { @@ -50,9 +54,9 @@ struct emulated_uint64_t } /** - * @brief Creates an `emulated_uint64_t` from a `uint64_t`. Useful for compile-time encoding. + * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. * - * @param [in] _data `uint64_t` to be unpacked into high and low bits + * @param [in] u `uint64_t` to be unpacked into high and low bits */ NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) { @@ -126,7 +130,15 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x < rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) < _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x < rhs.data.x; + } + // Lower bits are positive in both signed and unsigned else return data.y < rhs.data.y; } @@ -134,7 +146,14 @@ struct emulated_uint64_t NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { if (data.x != rhs.data.x) - return data.x > rhs.data.x; + { + // If signed, compare topmost bits as signed + NBL_IF_CONSTEXPR(Signed) + return _static_cast(data.x) > _static_cast(rhs.data.x); + // If unsigned, compare them as-is + else + return data.x > rhs.data.x; + } else return data.y > rhs.data.y; } @@ -150,94 +169,15 @@ struct emulated_uint64_t } }; -struct emulated_int64_t : emulated_uint64_t -{ - using base_t = emulated_uint64_t; - using base_t::storage_t; - using this_t = emulated_int64_t; - - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- - +using emulated_uint64_t = emulated_int64_base; +using emulated_int64_t = emulated_int64_base; - #ifndef __HLSL_VERSION - - emulated_int64_t() = default; - - #endif +// ---------------------- Functional operatos ------------------------ - /** - * @brief Creates an `emulated_int64_t` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - return _static_cast(base_t::create(_data)); - } - - /** - * @brief Creates an `emulated_int64_t` from two `uint32_t`s representing its bitpattern - * - * @param [in] hi Highest 32 bits of the `int64` being emulated - * @param [in] lo Lowest 32 bits of the `int64` being emulated - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) - { - return _static_cast(base_t::create(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_t` from a `int64_t`. Useful for compile-time encoding. - * - * @param [in] _data `int64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(int64_t) i) - { - return _static_cast(base_t::create(_static_cast(i))); - } - - // Only valid in CPP - #ifndef __HLSL_VERSION - - // Only this one needs to be redefined since it's arithmetic - constexpr inline this_t operator>>(uint16_t bits) const; - - #endif - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - - // Same as unsigned but the topmost bits are compared as signed - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) < _static_cast(rhs.data.x); - else - return data.y < rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - if (data.x != rhs.data.x) - return _static_cast(data.x) > _static_cast(rhs.data.x); - else - return data.y > rhs.data.y; - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; - -template<> -struct left_shift_operator +template +struct left_shift_operator > { - using type_t = emulated_uint64_t; + using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) @@ -248,7 +188,7 @@ struct left_shift_operator const uint32_t shift = ComponentBitWidth - _bits; // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return emulated_uint64_t::create(retValData); + return type_t::create(retValData); } }; @@ -270,18 +210,6 @@ struct arithmetic_right_shift_operator } }; -template<> -struct left_shift_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - left_shift_operator leftShift; - return _static_cast(leftShift(_static_cast(operand), bits)); - } -}; - template<> struct arithmetic_right_shift_operator { @@ -303,7 +231,8 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint16_t bits) const +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); @@ -356,113 +285,163 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { - using To = Unsigned; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(u.data.y); + return _static_cast(val.data.y); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = uint64_t; - using From = emulated_uint64_t; + using To = I; + using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(u.data.x) << To(32); - return highBits | _static_cast(u.data.y); + const To highBits = _static_cast(val.data.x) << To(32); + return highBits | _static_cast(val.data.y); } }; -template NBL_PARTIAL_REQ_TOP(concepts::UnsignedIntegralScalar && (sizeof(Unsigned) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Unsigned) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = Unsigned; + using To = emulated_int64_base; + using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(uint32_t(0), _static_cast(u)); + return To::create(uint32_t(0), _static_cast(i)); } }; -template<> -struct static_cast_helper +template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > { - using To = emulated_uint64_t; - using From = uint64_t; + using To = emulated_int64_base; + using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return emulated_uint64_t::create(u); + return To::create(_static_cast(i)); } }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > +} //namespace impl + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template +struct plus > { - using To = Signed; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return _static_cast(i.data.y); + return lhs + rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus > { - using To = int64_t; - using From = emulated_int64_t; + using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - const To highBits = _static_cast(i.data.x) << To(32); - return highBits | _static_cast(i.data.y); + return lhs - rhs; } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template NBL_PARTIAL_REQ_TOP(concepts::SignedIntegralScalar && (sizeof(Signed) <= sizeof(uint32_t))) -struct static_cast_helper && (sizeof(Signed) <= sizeof(uint32_t))) > -{ - using To = emulated_int64_t; - using From = Signed; +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +#endif - // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template +struct plus_assign > +{ + using type_t = emulated_int64_base; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(uint32_t(0), _static_cast(i)); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -template<> -struct static_cast_helper +template +struct minus_assign > { - using To = emulated_int64_t; - using From = int64_t; - - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + using type_t = emulated_int64_base; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { - return emulated_int64_t::create(i); + lhs = baseOp(lhs, rhs); } + + #ifndef __HLSL_VERSION + NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; + #else + NBL_CONSTEXPR_STATIC_INLINE type_t identity; + #endif }; -} //namespace impl +#ifdef __HLSL_VERSION +template<> +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +#endif } //namespace nbl } //namespace hlsl +// Declare them as signed/unsigned versions of each other + #ifndef __HLSL_VERSION #define NBL_ADD_STD std:: #else diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9c834424a8..e2ae3d8b0a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -39,17 +39,28 @@ NBL_CONSTEXPR T decode_mask_v = decode_mask::value; // --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- // Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +#ifndef __HLSL_VERSION + #define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ {\ NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ }; -#ifndef __HLSL_VERSION - #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) #else +#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +{\ + NBL_CONSTEXPR_STATIC_INLINE T value;\ +};\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ +template<>\ +NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ + #define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ {\ NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ From 55601628733ca20218f0c13d481e0c1df29bed1a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 8 Apr 2025 19:44:15 -0300 Subject: [PATCH 20/75] Addressing latest PR review. Generic overloads for of different functional structs blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7325 --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 218 +++++++++--------- include/nbl/builtin/hlsl/functional.hlsl | 102 ++++++++ include/nbl/builtin/hlsl/morton.hlsl | 2 + .../nbl/builtin/hlsl/portable/int64_t.hlsl | 7 +- 4 files changed, 218 insertions(+), 111 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index cad10242f2..45cb82ed78 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" // Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs // and whether the topmost bits of the divisor are equal to 0 @@ -35,7 +36,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -48,19 +49,9 @@ struct emulated_int64_base * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) hi, NBL_CONST_REF_ARG(uint32_t) lo) + NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { - return create(storage_t(hi, lo)); - } - - /** - * @brief Creates an `emulated_int64_base` from a `uint64_t` with its bitpattern. Useful for compile-time encoding. - * - * @param [in] u `uint64_t` to be unpacked into high and low bits - */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint64_t) u) - { - return create(_static_cast(u >> 32), _static_cast(u)); + return create(storage_t(lo, hi)); } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- @@ -92,9 +83,9 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr inline this_t operator<<(this_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr inline this_t operator>>(this_t bits) const; #endif @@ -102,16 +93,16 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.y, rhs.data.y); - const storage_t addResult = { data.x + rhs.data.x + lowerAddResult.carry, lowerAddResult.result }; + const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); + const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; const this_t retVal = create(addResult); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.y, rhs.data.y); - const storage_t subResult = { data.x - rhs.data.x - lowerSubResult.borrow, lowerSubResult.result }; + const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); + const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; const this_t retVal = create(subResult); return retVal; } @@ -172,86 +163,6 @@ struct emulated_int64_base using emulated_uint64_t = emulated_int64_base; using emulated_int64_t = emulated_int64_base; -// ---------------------- Functional operatos ------------------------ - -template -struct left_shift_operator > -{ - using type_t = emulated_int64_base; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << _bits) | (operand.data.y >> shift), operand.data.y << _bits }; - return type_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_uint64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_uint64_t::create(retValData); - } -}; - -template<> -struct arithmetic_right_shift_operator -{ - using type_t = emulated_int64_t; - NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) - { - if (!bits) - return operand; - const uint32_t _bits = uint32_t(bits); - const uint32_t shift = ComponentBitWidth - _bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> _bits, (operand.data.x << shift) | (operand.data.y >> _bits) }; - return emulated_int64_t::create(retValData); - } -}; - -#ifndef __HLSL_VERSION - -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint16_t bits) const -{ - left_shift_operator leftShift; - return leftShift(*this, bits); -} - -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint16_t bits) const -{ - arithmetic_right_shift_operator rightShift; - return rightShift(*this, bits); -} - -#endif - namespace impl { @@ -285,7 +196,7 @@ struct static_cast_helper } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = I; @@ -294,25 +205,24 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con // Return only the lowest bits NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - return _static_cast(val.data.y); + return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = I; using From = emulated_int64_base; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) { - const To highBits = _static_cast(val.data.x) << To(32); - return highBits | _static_cast(val.data.y); + return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t)) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; @@ -324,20 +234,108 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP((is_same_v || is_same_v) && (is_signed_v == Signed)) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT((is_same_v || is_same_v) && (is_signed_v == Signed)) > +template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > { using To = emulated_int64_base; using From = I; NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) { - return To::create(_static_cast(i)); + To retVal; + retVal.data = bit_cast(i); + return retVal; } }; } //namespace impl +// ---------------------- Functional operators ------------------------ + +template +struct left_shift_operator > +{ + using type_t = emulated_int64_base; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component + const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; + return type_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_uint64_t::create(retValData); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can only be defined with `_bits` being of `type_t`, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + { + const uint32_t bits = _static_cast(_bits); + if (!bits) + return operand; + const uint32_t shift = ComponentBitWidth - bits; + // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component + // Also the right shift *only* in the top bits happens as a signed arithmetic right shift + const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; + return emulated_int64_t::create(retValData); + } +}; + +#ifndef __HLSL_VERSION + +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index e5486e2727..cc95633f44 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -217,6 +218,56 @@ struct left_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) +struct left_shift_operator) && concepts::Vectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + template struct arithmetic_right_shift_operator { @@ -228,6 +279,57 @@ struct arithmetic_right_shift_operator } }; +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default template struct logical_right_shift_operator { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e2ae3d8b0a..ea583fddfa 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,8 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + namespace nbl { namespace hlsl diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl index 6929e160fa..2dffa40a2d 100644 --- a/include/nbl/builtin/hlsl/portable/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -10,18 +10,23 @@ namespace nbl { namespace hlsl { -template #ifdef __HLSL_VERSION #ifdef NBL_FORCE_EMULATED_INT_64 +template using portable_uint64_t = emulated_uint64_t; +template using portable_int64_t = emulated_int64_t; #else +template using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; #endif #else +template using portable_uint64_t = uint64_t; +template using portable_int64_t = int64_t; #endif From e50c56b52e873da965804153eba64b3cb133c4a3 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 00:23:55 -0300 Subject: [PATCH 21/75] Bunch of emulated int64 fixes regarding creation, comparison operators and left/right shifts --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 95 +++++++++---------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 45cb82ed78..98fcf2835b 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -34,7 +34,7 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { @@ -54,6 +54,18 @@ struct emulated_int64_base return create(storage_t(lo, hi)); } + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + { + return data.x; + } + + NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + { + return data.y; + } + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -93,60 +105,42 @@ struct emulated_int64_base NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::AddCarryOutput lowerAddResult = addCarry(data.x, rhs.data.x); - const storage_t addResult = { lowerAddResult.result, data.y + rhs.data.y + lowerAddResult.carry }; - const this_t retVal = create(addResult); + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); return retVal; } NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(data.x, rhs.data.x); - const storage_t subResult = { lowerSubResult.result, data.y - rhs.data.y - lowerSubResult.borrow }; - const this_t retVal = create(subResult); + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x == rhs.data.x && data.y == rhs.data.y; + return all(data == rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return data.x != rhs.data.x || data.y != rhs.data.y; + return any(data != rhs.data); } NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) < _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x < rhs.data.x; - } - // Lower bits are positive in both signed and unsigned - else - return data.y < rhs.data.y; + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - if (data.x != rhs.data.x) - { - // If signed, compare topmost bits as signed - NBL_IF_CONSTEXPR(Signed) - return _static_cast(data.x) > _static_cast(rhs.data.x); - // If unsigned, compare them as-is - else - return data.x > rhs.data.x; - } - else - return data.y > rhs.data.y; + // Same reasoning as above + const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -260,15 +254,15 @@ struct left_shift_operator > // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `x` component of the vector (which represents the higher bits of the emulated uint64) to get the `bits` higher bits of the `y` component - const vector retValData = { (operand.data.x << bits) | (operand.data.y >> shift), operand.data.y << bits }; - return type_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + return bits ? shifted : operand; } }; @@ -280,15 +274,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - const vector retValData = { operand.data.x >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_uint64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + return bits ? shifted : operand; } }; @@ -300,16 +294,15 @@ struct arithmetic_right_shift_operator // Can only be defined with `_bits` being of `type_t`, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - if (!bits) - return operand; - const uint32_t shift = ComponentBitWidth - bits; - // We need the `y` component of the vector (which represents the lower bits of the emulated uint64) to get the `bits` lower bits of the `x` component - // Also the right shift *only* in the top bits happens as a signed arithmetic right shift - const vector retValData = { _static_cast(_static_cast(operand.data.x)) >> bits, (operand.data.x << shift) | (operand.data.y >> bits) }; - return emulated_int64_t::create(retValData); + const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + return bits ? shifted : operand; } }; From b1de9c37b2e2572ea13163f241e9fab0a044bb8e Mon Sep 17 00:00:00 2001 From: Fletterio Date: Wed, 9 Apr 2025 16:24:21 -0300 Subject: [PATCH 22/75] Fix automatic specialize macro in cpp compat intrinsics, add intrinsic and generic ternary operator that should work for all compatible types, address PR review comments --- include/nbl/builtin/hlsl/complex.hlsl | 16 -------- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 38 +++++++++++++++++-- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 6 +++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 +++++++++++------- include/nbl/builtin/hlsl/functional.hlsl | 21 +++++++++- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 6 +++ 6 files changed, 89 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 6728a9bf3d..a3a9f387d0 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -427,22 +427,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 92fc9e929b..e1ba823b9b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -75,6 +75,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -121,8 +123,8 @@ struct subBorrow_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -143,8 +145,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -633,6 +636,35 @@ struct subBorrow_helper } }; +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 1f1957dbbd..284ba564d7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -150,6 +150,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 98fcf2835b..53881423e9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -132,15 +132,19 @@ struct emulated_int64_base { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() < rhs.__getLSB()))); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - return any(vector(MSB, (__getMSB() == rhs.__getMSB()) && (__getLSB() > rhs.__getLSB()))); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; } NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -259,10 +263,12 @@ struct left_shift_operator > NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(0, operand.__getLSB() << shift) - : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -279,10 +285,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(operand.__getMSB() >> shift, 0) - : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - return bits ? shifted : operand; + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; @@ -299,10 +307,12 @@ struct arithmetic_right_shift_operator NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) { const uint32_t bits = _static_cast(_bits); - const uint32_t shift = bits >= ComponentBitWidth ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bits >= ComponentBitWidth ? vector(uint32_t(int32_t(operand.__getMSB()) >> bits), ~uint32_t(0)) + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - return bits ? shifted : operand; + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); } }; diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index cc95633f44..51ee4f4829 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -201,9 +201,26 @@ struct ternary_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) { - return condition ? lhs : rhs; + return select(condition, lhs, rhs); + } +}; + +template +struct ternary_operator > +{ + using type_t = T; + using traits = hlsl::vector_traits; + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select(condition, lhs, rhs); + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select, T>(condition, lhs, rhs); } }; diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index d8d90de726..8add7a9ed3 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -12,6 +12,7 @@ #include #include #include +#include namespace nbl { @@ -335,6 +336,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, BooleanVector> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From ea8cd43756146225058dcfbc1ddf4d254b0fd579 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 12:39:16 -0300 Subject: [PATCH 23/75] Checkpoint: adding a bunch of operators to emulated vector types --- include/nbl/builtin/hlsl/concepts/core.hlsl | 10 + include/nbl/builtin/hlsl/concepts/vector.hlsl | 2 + include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 + .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 108 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 4 +- include/nbl/builtin/hlsl/functional.hlsl | 101 ++++-- include/nbl/builtin/hlsl/morton.hlsl | 290 +++++------------- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + 8 files changed, 236 insertions(+), 283 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..4a8b848cb8 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar { NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; }; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; +}; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; + } } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..3ea3199951 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 77d9d887bd..81bdf32c19 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -11,6 +11,7 @@ #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline @@ -45,6 +46,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_STATIC_FUNC static #define NBL_CONSTEXPR_INLINE_FUNC inline diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 53881423e9..ca51b0060a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -94,10 +94,8 @@ struct emulated_int64_base // Only valid in CPP #ifndef __HLSL_VERSION - - constexpr inline this_t operator<<(this_t bits) const; - - constexpr inline this_t operator>>(this_t bits) const; + constexpr inline this_t operator<<(uint32_t bits) const; + constexpr inline this_t operator>>(uint32_t bits) const; #endif @@ -256,13 +254,12 @@ struct left_shift_operator > using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in LSB and the result of `uint32_t(1) << 32` in your architecture in MSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) @@ -270,6 +267,12 @@ struct left_shift_operator > ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -278,13 +281,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) @@ -292,6 +294,12 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; template<> @@ -300,13 +308,12 @@ struct arithmetic_right_shift_operator using type_t = emulated_int64_t; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); - // Can only be defined with `_bits` being of `type_t`, see: + // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 - // If `_bits > 63` the result is undefined (current impl returns `0xFFFFFFFF` in MSB and the result of `~uint32_t(0) >> 32` in your architecture in LSB) - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t _bits) + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { - const uint32_t bits = _static_cast(_bits); const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) @@ -314,24 +321,30 @@ struct arithmetic_right_shift_operator ternary_operator ternary; return ternary(bool(bits), shifted, operand); } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } }; #ifndef __HLSL_VERSION template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(this_t bits) const +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(this_t bits) const +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -constexpr inline emulated_int64_t emulated_int64_t::operator>>(this_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); @@ -353,11 +366,7 @@ struct plus > return lhs + rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -370,23 +379,17 @@ struct minus > return lhs - rhs; } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = _static_cast(uint64_t(0)); - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -402,11 +405,7 @@ struct plus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; template @@ -420,23 +419,30 @@ struct minus_assign > lhs = baseOp(lhs, rhs); } - #ifndef __HLSL_VERSION - NBL_CONSTEXPR_STATIC_INLINE type_t identity = base_t::identity; - #else - NBL_CONSTEXPR_STATIC_INLINE type_t identity; - #endif + const static type_t identity; }; -#ifdef __HLSL_VERSION template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; -#endif +NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ +namespace impl +{ +template +struct is_emulating_integral_scalar > +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 65a97bbe68..4d7c3839d9 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -330,8 +330,8 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 51ee4f4829..93687bdb6a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -207,23 +207,6 @@ struct ternary_operator } }; -template -struct ternary_operator > -{ - using type_t = T; - using traits = hlsl::vector_traits; - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } - - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(vector) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select, T>(condition, lhs, rhs); - } -}; - template struct left_shift_operator { @@ -252,34 +235,68 @@ struct left_shift_operator) > } }; -template NBL_PARTIAL_REQ_TOP(! (concepts::IntVector) && concepts::Vectorial) -struct left_shift_operator) && concepts::Vectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, leftShift(operand.getComponent(i), bits)); + setter(shifted, i, leftShift(getter(operand, i), bits)); } return shifted; } @@ -313,34 +330,68 @@ struct arithmetic_right_shift_operator NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct arithmetic_right_shift_operator) > +template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits.getComponent(i))); + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); } return shifted; } NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + { + array_get getter; + array_set setter; NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] for (uint16_t i = 0; i < extent; i++) { - shifted.setComponent(i, rightShift(operand.getComponent(i), bits)); + setter(shifted, i, rightShift(getter(operand, i), bits)); } return shifted; } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index ea583fddfa..9e62e40c2a 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/functional.hlsl" #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -22,90 +23,67 @@ namespace impl // Valid dimension for a morton code template -NBL_BOOL_CONCEPT MortonDimension = 1 < D && D < 5; +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// Basic decode masks - -template -struct decode_mask; +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -template -struct decode_mask : integral_constant {}; +NBL_CONSTEXPR uint16_t CodingStages = 5; -template -struct decode_mask : integral_constant::value << Dim) | T(1)> {}; +template +struct coding_mask; -template -NBL_CONSTEXPR T decode_mask_v = decode_mask::value; +template +NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- -// Proper encode masks (either generic `T array[masksPerDImension]` or `morton_mask`) impossible to have until at best HLSL202y +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; -#ifndef __HLSL_VERSION +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ {\ - NBL_CONSTEXPR_STATIC_INLINE T value = _static_cast(HEX_VALUE);\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -#else +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off -#define NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK \ -{\ - NBL_CONSTEXPR_STATIC_INLINE T value;\ -};\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint16_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint32_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ -template<>\ -NBL_CONSTEXPR_STATIC_INLINE uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE);\ - -#define NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE) template<> struct morton_mask_##DIM##_##MASK##\ -{\ - NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t value;\ -};\ -NBL_CONSTEXPR_STATIC_INLINE emulated_uint64_t morton_mask_##DIM##_##MASK##::value = _static_cast(HEX_VALUE); -#endif +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) -#define NBL_MORTON_DECODE_MASK(DIM, MASK, HEX_VALUE) template struct morton_mask_##DIM##_##MASK ;\ - NBL_MORTON_EMULATED_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - NBL_MORTON_GENERIC_DECODE_MASK(DIM, MASK, HEX_VALUE)\ - template\ - NBL_CONSTEXPR T morton_mask_##DIM##_##MASK##_v = morton_mask_##DIM##_##MASK##::value; - -NBL_MORTON_DECODE_MASK(2, 0, uint64_t(0x5555555555555555)) // Groups bits by 1 on, 1 off -NBL_MORTON_DECODE_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_MORTON_DECODE_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_MORTON_DECODE_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_MORTON_DECODE_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off -NBL_MORTON_DECODE_MASK(2, 5, uint64_t(0x00000000FFFFFFFF)) // Groups bits by 32 on, 32 off - -NBL_MORTON_DECODE_MASK(3, 0, uint64_t(0x1249249249249249)) // Groups bits by 1 on, 2 off - also limits each dimension to 21 bits -NBL_MORTON_DECODE_MASK(3, 1, uint64_t(0x01C0E070381C0E07)) // Groups bits by 3 on, 6 off -NBL_MORTON_DECODE_MASK(3, 2, uint64_t(0x0FC003F000FC003F)) // Groups bits by 6 on, 12 off -NBL_MORTON_DECODE_MASK(3, 3, uint64_t(0x0000FFF000000FFF)) // Groups bits by 12 on, 24 off -NBL_MORTON_DECODE_MASK(3, 4, uint64_t(0x0000000000FFFFFF)) // Groups bits by 24 on, 48 off - -NBL_MORTON_DECODE_MASK(4, 0, uint64_t(0x1111111111111111)) // Groups bits by 1 on, 3 off -NBL_MORTON_DECODE_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_MORTON_DECODE_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_MORTON_DECODE_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_MORTON_DECODE_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off - -#undef NBL_MORTON_DECODE_MASK -#undef NBL_MORTON_EMULATED_DECODE_MASK -#undef NBL_MORTON_GENERIC_DECODE_MASK - -// ----------------------------------------------------------------- MORTON ENCODERS --------------------------------------------------- - -template -struct MortonEncoder; - -template -struct MortonEncoder<2, Bits, encode_t> +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- + +template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonEncoder { template NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) @@ -114,168 +92,70 @@ struct MortonEncoder<2, Bits, encode_t> encode_t encoded = _static_cast(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { - encoded = (encoded | leftShift(encoded, 16)) & morton_mask_2_4_v; + encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - encoded = (encoded | leftShift(encoded, 8)) & morton_mask_2_3_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - encoded = (encoded | leftShift(encoded, 4)) & morton_mask_2_2_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - encoded = (encoded | leftShift(encoded, 2)) & morton_mask_2_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 1)) & morton_mask_2_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 12) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_3_1_v; - } - encoded = (encoded | leftShift(encoded, 2) | leftShift(encoded, 4)) & morton_mask_3_0_v; - return encoded; - } -}; - -template -struct MortonEncoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) - { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = (encoded | leftShift(encoded, 24)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = (encoded | leftShift(encoded, 12)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 2) + NBL_IF_CONSTEXPR(Bits > 1) { - encoded = (encoded | leftShift(encoded, 6)) & morton_mask_4_1_v; + encoded = encoded & _static_cast(coding_mask_v); + encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - encoded = (encoded | leftShift(encoded, 3)) & morton_mask_4_0_v; - return encoded; + return encoded & _static_cast(coding_mask_v); } }; -// ----------------------------------------------------------------- MORTON DECODERS --------------------------------------------------- +// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template -struct MortonDecoder; - -template -struct MortonDecoder<2, Bits, encode_t> +template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +struct MortonDecoder { - template + template 16), uint32_t, uint16_t> + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_2_0_v; + arithmetic_right_shift_operator > rightShift; + portable_vector_t decoded; NBL_IF_CONSTEXPR(Bits > 1) { - decoded = (decoded | rightShift(decoded, 1)) & morton_mask_2_1_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 2) { - decoded = (decoded | rightShift(decoded, 2)) & morton_mask_2_2_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 4) { - decoded = (decoded | rightShift(decoded, 4)) & morton_mask_2_3_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 8) { - decoded = (decoded | rightShift(decoded, 8)) & morton_mask_2_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); } NBL_IF_CONSTEXPR(Bits > 16) { - decoded = (decoded | rightShift(decoded, 16)) & morton_mask_2_5_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<3, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_3_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 2) | rightShift(decoded, 4)) & morton_mask_3_1_v; - } - NBL_IF_CONSTEXPR(Bits > 3) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_3_2_v; - } - NBL_IF_CONSTEXPR(Bits > 6) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_3_3_v; - } - NBL_IF_CONSTEXPR(Bits > 12) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_3_4_v; - } - - return _static_cast(decoded); - } -}; - -template -struct MortonDecoder<4, Bits, encode_t> -{ - template - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) - { - arithmetic_right_shift_operator rightShift; - encode_t decoded = encodedValue & morton_mask_4_0_v; - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = (decoded | rightShift(decoded, 3)) & morton_mask_4_1_v; - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = (decoded | rightShift(decoded, 6)) & morton_mask_4_2_v; - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = (decoded | rightShift(decoded, 12)) & morton_mask_4_3_v; - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = (decoded | rightShift(decoded, 24)) & morton_mask_4_4_v; + decoded = decoded & _static_cast(coding_mask_v); + decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); } - return _static_cast(decoded); + return _static_cast(decoded & _static_cast(coding_mask_v)); } }; @@ -290,7 +170,7 @@ struct Equals { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; [[unroll]] @@ -342,7 +222,7 @@ struct BaseComparison { NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; vector retVal; ComparisonOp comparison; @@ -392,7 +272,7 @@ struct LessEquals : BaseComparison && D * Bits <= 64) +template && D * Bits <= 64) struct code { using this_t = code; @@ -515,7 +395,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -536,7 +416,7 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = impl::decode_mask_v; + NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); left_shift_operator leftShift; this_t retVal; retVal.value = _static_cast(uint64_t(0)); @@ -653,14 +533,14 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } -template&& D* Bits <= 64) +template&& D* Bits <= 64) constexpr inline morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index 5bfc7ca89b..bc160de788 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -664,6 +664,8 @@ using conditional_t = typename conditional::type; // Template Variables +template +NBL_CONSTEXPR T integral_constant_v = integral_constant::value; template NBL_CONSTEXPR bool is_same_v = is_same::value; template From 53a5f6a8cd4c19718694ff701c3723bbfffcf0f5 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 11 Apr 2025 17:04:15 -0300 Subject: [PATCH 24/75] Vectorized encode/decode for better pipelining --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 152 ++++++++++-------- include/nbl/builtin/hlsl/morton.hlsl | 29 +++- 2 files changed, 106 insertions(+), 75 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4d7c3839d9..c4938fc9c2 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,93 +147,107 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); - - return output; + #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ + {\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) + NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); + #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - this_t output; - + component_t sum = 0; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); + sum = sum + CRTP::getComponent(i); - return output; + return sum; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +template +struct emulated_vector : CRTP +{ + using component_t = ComponentType; + using this_t = emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) + NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); + output.setComponent(i, other.getComponent(i)); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) + template + NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) { this_t output; - + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); + output.setComponent(i, ComponentType::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - return output; + #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + val);\ + return output;\ + }\ + NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ + {\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + return output;\ } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); - return output; - } + NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) + NBL_EMULATED_VECTOR_OPERATOR(+, true) + NBL_EMULATED_VECTOR_OPERATOR(-, true) + NBL_EMULATED_VECTOR_OPERATOR(*, true) + NBL_EMULATED_VECTOR_OPERATOR(/, true) + + #undef NBL_EMULATED_VECTOR_OPERATOR - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { - component_t sum = 0; + ComponentType sum = ComponentType::create(0); + [[unroll]] for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -241,6 +255,7 @@ struct emulated_vector : CRTP } }; + #define DEFINE_OPERATORS_FOR_TYPE(...)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ {\ @@ -270,12 +285,13 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- + +template +struct emulated_vector, CRTP, false> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) { @@ -293,7 +309,7 @@ struct emulated_vector : CRTP this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } @@ -330,8 +346,6 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) - //DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -341,9 +355,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9e62e40c2a..e8cb2b73bf 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -85,11 +85,12 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonEncoder { - template + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - left_shift_operator leftShift; - encode_t encoded = _static_cast(decodedValue); + left_shift_operator > leftShift; + portable_vector_t encoded = _static_cast >(decodedValue); NBL_IF_CONSTEXPR(Bits > 16) { encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); @@ -114,7 +115,16 @@ struct MortonEncoder encoded = encoded & _static_cast(coding_mask_v); encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); } - return encoded & _static_cast(coding_mask_v); + encoded = encoded & _static_cast(coding_mask_v); + encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); + // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element + encode_t actualEncoded = _static_cast(uint64_t(0)); + array_get, encode_t> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + actualEncoded = actualEncoded | getter(encoded, i); + + return actualEncoded; } }; @@ -123,12 +133,19 @@ struct MortonEncoder template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) struct MortonDecoder { - template 16), uint32_t, uint16_t> - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(vector_traits::scalar_type) * 8 >= Bits) + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedValue); + decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + NBL_IF_CONSTEXPR(Bits > 1) { decoded = decoded & _static_cast(coding_mask_v); From cf52d9cbf2d99e3ceb16495ef9049511cbde2096 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 14 Apr 2025 16:02:17 -0300 Subject: [PATCH 25/75] Adress the last of PR review changes: vectorize more operators, add a bunch of operators and functional structs for vectorial types --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 20 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 8 + .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 113 ++++++- include/nbl/builtin/hlsl/functional.hlsl | 28 +- include/nbl/builtin/hlsl/morton.hlsl | 319 ++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 28 +- 6 files changed, 310 insertions(+), 206 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 81bdf32c19..f01d2d78ec 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -90,7 +90,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -99,10 +99,26 @@ struct static_cast_helper #endif } }; + +// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code +// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) + { +#ifndef __HLSL_VERSION + return static_cast(s); +#else + return s; +#endif + } +}; + } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ca51b0060a..4f354c900e 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -431,6 +431,14 @@ NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity template<> NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + // --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- namespace concepts { diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index c4938fc9c2..fd5f5e3c34 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -147,7 +147,7 @@ struct emulated_vector : CRTP return output; } - #define NBL_EMULATED_VECTOR_DEFINE_OPERATOR(OP)\ + #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ {\ this_t output;\ @@ -170,15 +170,33 @@ struct emulated_vector : CRTP return output;\ } - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(&) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(|) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(^) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(+) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(-) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(*) - NBL_EMULATED_VECTOR_DEFINE_OPERATOR(/) + NBL_EMULATED_VECTOR_OPERATOR(&) + NBL_EMULATED_VECTOR_OPERATOR(|) + NBL_EMULATED_VECTOR_OPERATOR(^) + NBL_EMULATED_VECTOR_OPERATOR(+) + NBL_EMULATED_VECTOR_OPERATOR(-) + NBL_EMULATED_VECTOR_OPERATOR(*) + NBL_EMULATED_VECTOR_OPERATOR(/) - #undef NBL_EMULATED_VECTOR_DEFINE_OPERATOR + #undef NBL_EMULATED_VECTOR_OPERATOR + + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() { @@ -222,7 +240,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + val);\ + output.setComponent(i, CRTP::getComponent(i) OP val);\ return output;\ }\ NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ @@ -230,7 +248,7 @@ struct emulated_vector : CRTP this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i));\ + output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ return output;\ } @@ -244,6 +262,24 @@ struct emulated_vector : CRTP #undef NBL_EMULATED_VECTOR_OPERATOR + #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ + {\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ + } + + NBL_EMULATED_VECTOR_COMPARISON(==) + NBL_EMULATED_VECTOR_COMPARISON(!=) + NBL_EMULATED_VECTOR_COMPARISON(<) + NBL_EMULATED_VECTOR_COMPARISON(<=) + NBL_EMULATED_VECTOR_COMPARISON(>) + NBL_EMULATED_VECTOR_COMPARISON(>=) + + #undef NBL_EMULATED_VECTOR_COMPARISON + NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() { ComponentType sum = ComponentType::create(0); @@ -442,7 +478,7 @@ namespace impl template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -455,7 +491,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -469,7 +505,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -487,7 +523,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) { array_get getter; array_set setter; @@ -500,6 +536,53 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct static_cast_helper, emulated_vector_t##M , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 93687bdb6a..45198cbe7a 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -80,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -92,7 +92,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,13 +136,35 @@ ALIAS_STD(divides,/) }; +ALIAS_STD(equal_to,==) }; +ALIAS_STD(not_equal_to,!=) }; ALIAS_STD(greater,>) }; ALIAS_STD(less,<) }; ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(less_equal, <= ) }; #undef ALIAS_STD +// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION + // ------------------------ Compound assignment operators ---------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e8cb2b73bf..d2fca1165f 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -82,61 +83,65 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS // ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- -template && (Dim * Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonEncoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { + NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t encoded = _static_cast >(decodedValue); - NBL_IF_CONSTEXPR(Bits > 16) - { - encoded = encoded | leftShift(encoded, 16 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 1) + portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = Stages; i > 0; i--) { - encoded = encoded & _static_cast(coding_mask_v); - encoded = encoded | leftShift(encoded, 1 * (Dim - 1)); + interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); + interleaved = interleaved & EncodeMasks[i - 1]; } - encoded = encoded & _static_cast(coding_mask_v); - encoded = leftShift(encoded, _static_cast >(vector(0, 1, 2, 3))); - // The `encoded` above is vectorized for each coord, here we collapse all coords into a single element - encode_t actualEncoded = _static_cast(uint64_t(0)); + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; [[unroll]] for (uint16_t i = 0; i < Dim; i++) - actualEncoded = actualEncoded | getter(encoded, i); - - return actualEncoded; + encoded = encoded | getter(interleaveShifted, i); + + return encoded; } }; // ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && (Dim* Bits <= 64) && (sizeof(encode_t) * 8 >= Dim * Bits)) +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) struct MortonDecoder { template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && sizeof(typename vector_traits::scalar_type) * 8 >= Bits) + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { + NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; arithmetic_right_shift_operator > rightShift; portable_vector_t decoded; array_set, encode_t> setter; @@ -146,38 +151,28 @@ struct MortonDecoder setter(decoded, i, encodedValue); decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); - NBL_IF_CONSTEXPR(Bits > 1) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 1 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 2) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 2 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 4) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 4 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 8) - { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 8 * (Dim - 1)); - } - NBL_IF_CONSTEXPR(Bits > 16) + NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; + [[unroll]] + for (uint16_t i = 0; i < Stages; i++) { - decoded = decoded & _static_cast(coding_mask_v); - decoded = decoded | rightShift(decoded, 16 * (Dim - 1)); + decoded = decoded & DecodeMasks[i]; + decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); } - return _static_cast(decoded & _static_cast(coding_mask_v)); + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & DecodeMasks[CodingStages]); } }; // ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- // Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); template struct Equals; @@ -185,105 +180,76 @@ struct Equals; template struct Equals { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - retVal[i] = (value & leftShift(Mask, i)) == leftShift(rhs[i], i); - } - return retVal; + NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + + portable_vector_t rhsCasted = _static_cast >(rhs); + portable_vector_t xored = rhsCasted ^ value; + return xored == zeros; } }; template struct Equals { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } - Equals equals; - return equals(value, interleaved); + const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); + return Equals::__call(value, interleaved); } }; template struct BaseComparison; -// Aux method for extracting highest bit, used by the comparison below -template -NBL_CONSTEXPR_INLINE_FUNC storage_t extractHighestBit(storage_t value, uint16_t coord) -{ - // Like above, if the number encoded in `coord` gets `bits(coord) = ceil((BitWidth - coord)/D)` bits for representation, then the highest index of these - // bits is `bits(coord) - 1` - const uint16_t coordHighestBitIdx = Bits / D - ((coord < Bits % D) ? uint16_t(0) : uint16_t(1)); - // This is the index of that bit as an index in the encoded value - const uint16_t shift = coordHighestBitIdx * D + coord; - left_shift_operator leftShift; - return value & leftShift(_static_cast(uint16_t(1)), shift); -} +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); template struct BaseComparison { - NBL_CONSTEXPR_INLINE_FUNC vector operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; - vector retVal; + NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); + NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - storage_t thisCoord = value & leftShift(Mask, i); - storage_t rhsCoord = leftShift(rhs[i], i); - // If coordinate is negative, we add 1s in every bit not corresponding to coord - if (extractHighestBit(thisCoord) != _static_cast(uint64_t(0))) - thisCoord = thisCoord | ~leftShift(Mask, i); - if (extractHighestBit(rhsCoord) != _static_cast(uint64_t(0))) - rhsCoord = rhsCoord | ~leftShift(Mask, i); - retVal[i] = comparison(thisCoord, rhsCoord); - } - return retVal; + // Obtain a vector of deinterleaved coordinates and flip their sign bits + const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); } }; template struct BaseComparison { - template - NBL_CONSTEXPR_INLINE_FUNC enable_if_t&& is_scalar_v && (is_signed_v == Signed), vector > - operator()(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - using U = make_unsigned_t; - vector interleaved; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - interleaved[i] = impl::MortonEncoder::encode(_static_cast(rhs[i])); - } + const vector interleaved = MortonEncoder::interleaveShift(rhs); BaseComparison baseComparison; return baseComparison(value, interleaved); } }; template -struct LessThan : BaseComparison > {}; +struct LessThan : BaseComparison > > {}; template -struct LessEquals : BaseComparison > {}; +struct LessEquals : BaseComparison > > {}; +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEquals : BaseComparison > > {}; } //namespace impl @@ -313,19 +279,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { - using U = make_unsigned_t; - left_shift_operator leftShift; - storage_t encodedCartesian = _static_cast(uint64_t(0)); - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - encodedCartesian = encodedCartesian | leftShift(impl::MortonEncoder::encode(_static_cast(cartesian[i])), i); - } this_t retVal; - retVal.value = encodedCartesian; + retVal.value = impl::MortonEncoder::encode(cartesian); return retVal; } @@ -337,8 +295,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - - template + template= Bits) explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -347,11 +304,8 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template - constexpr inline explicit operator vector() const noexcept - { - return _static_cast, morton::code, Bits, D>>(*this); - } + template= Bits) + constexpr inline explicit operator vector() const noexcept; #endif @@ -398,14 +352,13 @@ struct code NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { - left_shift_operator leftShift; - // allOnes encodes a cartesian coordinate with all values set to 1 - this_t allOnes; - allOnes.value = leftShift(_static_cast(1), D) - _static_cast(1); - // Using 2's complement property that arithmetic negation can be obtained by bitwise negation then adding 1 - this_signed_t retVal; - retVal.value = (operator~() + allOnes).value; - return retVal; + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif } // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- @@ -453,48 +406,51 @@ struct code return value == rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator==(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::Equals equals; - return equals(value, rhs); - } + return impl::Equals::__call(value, rhs); + } NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator!=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator== (rhs); + return !equals(rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessThan lessThan; - return lessThan(value, rhs); + return impl::LessThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator<=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - impl::LessEquals lessEquals; - return lessEquals(value, rhs); + return impl::LessEquals::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator<= (rhs); + return impl::GreaterThan::__call(value, rhs); } - template - enable_if_t<(is_signed_v == Signed) || (is_same_v && BitsAlreadySpread), vector > operator>=(NBL_CONST_REF_ARG(vector) rhs) + template) + NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !operator< (rhs); + return impl::GreaterEquals::__call(value, rhs); } }; @@ -571,26 +527,29 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (8 * sizeof(I) >= Bits)) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { - using U = make_unsigned_t; using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - arithmetic_right_shift_operator rightShift; - vector cartesian; - [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - cartesian[i] = _static_cast(morton::impl::MortonDecoder::template decode(rightShift(val.value, i))); - } - return cartesian; + return morton::impl::MortonDecoder::decode(val.value); } }; } // namespace impl +#ifndef __HLSL_VERSION + +template && D* Bits <= 64) +template = Bits) +constexpr inline morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code, Bits, D>>(*this); +} + +#endif + } //namespace hlsl } //namespace nbl diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 2015b05b3d..67f6445324 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero template NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR bool is_pot_v = is_pot::value; + template struct log2 { NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -79,11 +89,6 @@ struct align_up template NBL_CONSTEXPR uint64_t align_up_v = align_up::value; -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; - template struct max { @@ -99,6 +104,17 @@ struct min }; template NBL_CONSTEXPR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; + } } } From f954522001947a4f7f4c74696b71571924a5c590 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 24 Apr 2025 15:57:18 -0300 Subject: [PATCH 26/75] Removed `NBL_CONSTEXPR_INLINE_FUNC` macro, replaced all usages with `NBL_CONSTEXPR_FUNC` Adds `OpUndef` to spirv `intrinsics.hlsl` and `cpp_compat.hlsl` Adds an explicit `truncate` function for vectors and emulated vectors Adds a bunch of specializations for vectorial types in `functional.hlsl` Bugfixes and changes to Morton codes, very close to them working properly with emulated ints --- include/nbl/builtin/hlsl/algorithm.hlsl | 18 +- include/nbl/builtin/hlsl/cpp_compat.hlsl | 3 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 27 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 17 +- .../hlsl/cpp_compat/impl/vector_impl.hlsl | 35 -- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 12 +- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 76 ++++ .../nbl/builtin/hlsl/emulated/float64_t.hlsl | 16 +- .../builtin/hlsl/emulated/float64_t_impl.hlsl | 16 +- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 160 ++++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 423 ++++++++++-------- include/nbl/builtin/hlsl/functional.hlsl | 144 ++++-- include/nbl/builtin/hlsl/ieee754.hlsl | 16 +- include/nbl/builtin/hlsl/ieee754/impl.hlsl | 16 +- include/nbl/builtin/hlsl/morton.hlsl | 358 ++++++++------- .../builtin/hlsl/spirv_intrinsics/core.hlsl | 7 +- include/nbl/builtin/hlsl/type_traits.hlsl | 2 + src/nbl/builtin/CMakeLists.txt | 2 +- 19 files changed, 798 insertions(+), 562 deletions(-) delete mode 100644 include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl create mode 100644 include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 3a7c4963c2..0178673f4e 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index cb06447aa1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,8 +5,9 @@ // it includes vector and matrix #include #include +#include // Had to push some stuff here to avoid circular dependencies -#include +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f01d2d78ec..0985af6eb3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,8 +14,6 @@ #define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_STATIC_FUNC constexpr static -#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC constexpr static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,13 +42,11 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_FUNC inline #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONSTEXPR_STATIC_FUNC static -#define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_STATIC_INLINE_FUNC static inline +#define NBL_CONSTEXPR_STATIC_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) @@ -90,7 +86,7 @@ namespace impl template struct static_cast_helper { - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(NBL_CONST_REF_ARG(From) u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u) { #ifndef __HLSL_VERSION return static_cast(u); @@ -100,25 +96,10 @@ struct static_cast_helper } }; -// CPP-side, this can invoke the copy constructor if the copy is non-trivial in generic code -// HLSL-side, this enables generic conversion code between types, contemplating the case where no conversion is needed -template -struct static_cast_helper -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC Same cast(NBL_CONST_REF_ARG(Same) s) - { -#ifndef __HLSL_VERSION - return static_cast(s); -#else - return s; -#endif - } -}; - } template -NBL_CONSTEXPR_INLINE_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) +NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) { return impl::static_cast_helper::cast(v); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index e1ba823b9b..4f7c7370bc 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -109,6 +109,8 @@ template struct addCarry_helper; template struct subBorrow_helper; +template +struct undef_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -172,6 +174,7 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, // Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(bitCount_helper, bitCount, (T), (T), BITCOUNT_HELPER_RETRUN_TYPE) @@ -640,7 +643,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -650,7 +653,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_INLINE_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -665,6 +668,16 @@ struct select_helper&& concepts::V } }; +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC_FUNC T __call() + { + T t; + return t; + } +}; + #endif // C++ only specializations // C++ and HLSL specializations diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl deleted file mode 100644 index 524d1fa45e..0000000000 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/vector_impl.hlsl +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_CPP_COMPAT_IMPL_VECTOR_IMPL_INCLUDED_ - -#include -#include -#include - -// To prevent implicit truncation warnings -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ - -template NBL_PARTIAL_REQ_TOP(N <= M) -struct static_cast_helper, vector NBL_PARTIAL_REQ_BOT(N <= M) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(vector) val) - { - vector retVal; - [[unroll]] - for (uint16_t i = 0; i < N; i++) - { - retVal[i] = val[i]; - } - return retVal; - } -}; - -} -} -} - -#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 284ba564d7..c511042c27 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -151,7 +157,7 @@ inline bool any(Vector vec) } template -NBL_CONSTEXPR_INLINE_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) { return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); } @@ -224,13 +230,13 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A } template -NBL_CONSTEXPR_INLINE_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); } template -NBL_CONSTEXPR_INLINE_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) { return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..0afe214de7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -68,7 +68,7 @@ struct Promote, U> } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) { impl::Promote _promote; return _promote(v); diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..a95df183be --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,76 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1]}; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2], v[3] }; + return truncated; + } +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index a0cde90df9..2dfc52c957 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -577,10 +577,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4f354c900e..8a3fd42faf 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -20,6 +20,7 @@ struct emulated_int64_base { using storage_t = vector; using this_t = emulated_int64_base; + using this_signed_t = emulated_int64_base; storage_t data; @@ -29,6 +30,12 @@ struct emulated_int64_base emulated_int64_base() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_base(const I& toEmulate); + + constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + #endif /** @@ -36,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -46,47 +53,57 @@ struct emulated_int64_base /** * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern * - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } + // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- + // GLM requires these for vector casts + + #ifndef __HLSL_VERSION + + template + constexpr explicit operator I() const noexcept; + + #endif + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC { return data.x; } - NBL_CONSTEXPR_INLINE_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC { return data.y; } // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data & rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data | rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal = create(data ^ rhs.data); return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal = create(~data); return retVal; @@ -101,56 +118,62 @@ struct emulated_int64_base // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + vector negated = -data; + return this_signed_t::create(_static_cast(negated)); + } + + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - return retVal; + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - const this_t retVal = create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - return retVal; + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return all(data == rhs.data); + equal_to equals; + return all(equals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - return any(data != rhs.data); + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); } - NBL_CONSTEXPR_INLINE_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) < _static_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); const bool LSB = __getLSB() < rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { // Same reasoning as above const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (_static_cast(__getMSB()) > _static_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); const bool LSB = __getLSB() > rhs.__getLSB(); return MSBEqual ? LSB : MSB; } - NBL_CONSTEXPR_INLINE_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator>(rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return !operator<(rhs); } @@ -162,32 +185,16 @@ using emulated_int64_t = emulated_int64_base; namespace impl { -template<> -struct static_cast_helper -{ - using To = emulated_uint64_t; - using From = emulated_int64_t; - - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) - { - To retVal; - retVal.data = i.data; - return retVal; - } -}; - -template<> -struct static_cast_helper +template +struct static_cast_helper, emulated_int64_base > { - using To = emulated_int64_t; - using From = emulated_uint64_t; + using To = emulated_int64_base; + using From = emulated_int64_base; - // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From u) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; - retVal.data = u.data; + retVal.data = other.data; return retVal; } }; @@ -199,19 +206,19 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From val) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -224,28 +231,53 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { - return To::create(uint32_t(0), _static_cast(i)); + return To::create(_static_cast(i), uint32_t(0)); } }; -template NBL_PARTIAL_REQ_TOP(is_same_v || is_same_v ) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(is_same_v || is_same_v) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > { using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_INLINE_FUNC To cast(From i) + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else To retVal; - retVal.data = bit_cast(i); + retVal.data = bit_cast >(i); return retVal; + #endif } }; } //namespace impl +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +template +template +constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +{ + *this = _static_cast>(toEmulate); +} + +template +template +constexpr emulated_int64_base::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + // ---------------------- Functional operators ------------------------ template @@ -258,7 +290,7 @@ struct left_shift_operator > //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -269,7 +301,7 @@ struct left_shift_operator > } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -285,7 +317,7 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; @@ -296,7 +328,7 @@ struct arithmetic_right_shift_operator } // If `_bits > 63` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -312,18 +344,18 @@ struct arithmetic_right_shift_operator //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), ~uint32_t(0)) + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index fd5f5e3c34..3780ce001b 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -24,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -32,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -40,9 +40,10 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -56,7 +57,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -66,7 +67,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -76,9 +77,10 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -92,7 +94,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -104,7 +106,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -116,184 +118,210 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; +template +struct emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP)\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (component_t val)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (this_t other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC this_t operator##OP (vector other)\ - {\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP other[i]);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator##OP());\ + return output;\ +} - NBL_EMULATED_VECTOR_OPERATOR(&) - NBL_EMULATED_VECTOR_OPERATOR(|) - NBL_EMULATED_VECTOR_OPERATOR(^) - NBL_EMULATED_VECTOR_OPERATOR(+) - NBL_EMULATED_VECTOR_OPERATOR(-) - NBL_EMULATED_VECTOR_OPERATOR(*) - NBL_EMULATED_VECTOR_OPERATOR(/) +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_OPERATOR +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - #undef NBL_EMULATED_VECTOR_COMPARISON +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - return sum; - } +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) }; -template -struct emulated_vector : CRTP +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); - - return output; - } - - #define NBL_EMULATED_VECTOR_OPERATOR(OP, ENABLE_CONDITION) NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (component_t val)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP val);\ - return output;\ - }\ - NBL_CONSTEXPR_INLINE_FUNC enable_if_t< ENABLE_CONDITION , this_t> operator##OP (this_t other)\ - {\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) OP other.getComponent(i));\ - return output;\ - } - - NBL_EMULATED_VECTOR_OPERATOR(&, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(|, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(^, concepts::IntegralLikeScalar) - NBL_EMULATED_VECTOR_OPERATOR(+, true) - NBL_EMULATED_VECTOR_OPERATOR(-, true) - NBL_EMULATED_VECTOR_OPERATOR(*, true) - NBL_EMULATED_VECTOR_OPERATOR(/, true) - - #undef NBL_EMULATED_VECTOR_OPERATOR - - #define NBL_EMULATED_VECTOR_COMPARISON(OP) NBL_CONSTEXPR_INLINE_FUNC vector operator##OP (this_t other)\ - {\ - vector output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ - return output;\ - } - - NBL_EMULATED_VECTOR_COMPARISON(==) - NBL_EMULATED_VECTOR_COMPARISON(!=) - NBL_EMULATED_VECTOR_COMPARISON(<) - NBL_EMULATED_VECTOR_COMPARISON(<=) - NBL_EMULATED_VECTOR_COMPARISON(>) - NBL_EMULATED_VECTOR_COMPARISON(>=) - - #undef NBL_EMULATED_VECTOR_COMPARISON + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() - { - ComponentType sum = ComponentType::create(0); - [[unroll]] - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return sum; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR + +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -302,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -311,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -321,15 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- template -struct emulated_vector, CRTP, false> : CRTP +struct emulated_vector, CRTP> : CRTP { using component_t = emulated_float64_t; - using this_t = emulated_vector; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) { this_t output; @@ -340,7 +367,7 @@ struct emulated_vector, CRTP, fa } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) { this_t output; @@ -350,7 +377,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -359,7 +386,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -368,7 +395,7 @@ struct emulated_vector, CRTP, fa return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -391,7 +418,7 @@ struct emulated_vector, CRTP, fa DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) @@ -478,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -491,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -505,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_INLINE emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -523,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -541,11 +569,12 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, _static_cast(getter(vec, i)));\ return output;\ @@ -558,16 +587,38 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + #define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct static_cast_helper, emulated_vector_t##M , void>\ +struct Truncate, emulated_vector_t##M >\ {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_STATIC_INLINE OutputVecType cast(InputVecType vec)\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ OutputVecType output;\ + [[unroll]]\ for (int i = 0; i < N; ++i)\ setter(output, i, getter(vec, i));\ return output;\ @@ -583,7 +634,7 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) #undef NBL_EMULATED_VEC_TRUNCATION -} +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 45198cbe7a..76b527f6bd 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,18 +134,69 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; +#ifndef __HLSL_VERSION + +template +struct bit_not : std::bit_not +{ + using type_t = T; +}; + +#else + +template +struct bit_not +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return ~operand; + } +}; + +// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. +// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. +template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) +struct bit_not || concepts::Vector || concepts::Matrix)) > +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return operand.operator~(); + } +}; + +#endif -ALIAS_STD(equal_to,==) }; -ALIAS_STD(not_equal_to,!=) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal, <= ) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// The above comparison operators return bool on STD. Here's a specialization so that they return `vector` for vectorial types -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ struct NAME ) >\ {\ using type_t = T;\ @@ -156,16 +206,18 @@ struct NAME ) >\ }\ }; -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=) -NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=) +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) #undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -// ------------------------ Compound assignment operators ---------------------- +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ @@ -186,7 +238,7 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- // Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs(condition, lhs, rhs); } }; +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + template struct left_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } @@ -246,28 +300,28 @@ struct left_shift_operator) > using type_t = T; using scalar_t = scalar_type_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand << bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand << bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector && concepts::IntegralLikeVectorial) -struct left_shift_operator && concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -278,11 +332,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -293,11 +347,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -308,11 +362,11 @@ struct left_shift_operator && concept return shifted; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint32_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -329,7 +383,7 @@ struct arithmetic_right_shift_operator { using type_t = T; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } @@ -341,28 +395,28 @@ struct arithmetic_right_shift_operator; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { return operand >> bits; } - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { return operand >> bits; } }; -template NBL_PARTIAL_REQ_TOP(!concepts::Vector&& concepts::IntegralLikeVectorial) -struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > { using type_t = T; using scalar_t = typename vector_traits::scalar_type; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -373,11 +427,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -388,11 +442,11 @@ struct arithmetic_right_shift_operator::Dimension>) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -403,11 +457,11 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC_INLINE uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -426,7 +480,7 @@ struct logical_right_shift_operator using type_t = T; using unsigned_type_t = make_unsigned_t; - NBL_CONSTEXPR_INLINE_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { arithmetic_right_shift_operator arithmeticRightShift; return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 8d9c78a9f0..e81ff08c7b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -89,7 +89,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) +NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) { using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d2fca1165f..650d9ce6ba 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -8,7 +8,6 @@ #include "nbl/builtin/hlsl/emulated/int64_t.hlsl" #include "nbl/builtin/hlsl/mpl.hlsl" #include "nbl/builtin/hlsl/portable/vector_t.hlsl" -#include "nbl/builtin/hlsl/mpl.hlsl" // TODO: mega macro to get functional plus, minus, plus_assign, minus_assign @@ -33,8 +32,30 @@ NBL_CONSTEXPR uint16_t CodingStages = 5; template struct coding_mask; -template -NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +template +NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( + truncate >( + vector(coding_mask_v, + coding_mask_v << 1, + coding_mask_v << 2, + coding_mask_v << 3))); + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +template +NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( + truncate >( + vector(sign_mask_v, + sign_mask_v << 1, + sign_mask_v << 2, + sign_mask_v << 3))); // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -57,23 +78,23 @@ NBL_CONSTEXPR uint64_t coding_mask_v = coding_mask::value; NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ }; -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555) // Groups bits by 1 on, 1 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, uint64_t(0x3333333333333333)) // Groups bits by 2 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, uint64_t(0x0F0F0F0F0F0F0F0F)) // Groups bits by 4 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, uint64_t(0x00FF00FF00FF00FF)) // Groups bits by 8 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, uint64_t(0x0000FFFF0000FFFF)) // Groups bits by 16 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249) // Groups bits by 1 on, 2 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, uint64_t(0x30C30C30C30C30C3)) // Groups bits by 2 on, 4 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, uint64_t(0xF00F00F00F00F00F)) // Groups bits by 4 on, 8 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, uint64_t(0x00FF0000FF0000FF)) // Groups bits by 8 on, 16 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, uint64_t(0xFFFF00000000FFFF)) // Groups bits by 16 on, 32 off +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off -NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111) // Groups bits by 1 on, 3 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, uint64_t(0x0303030303030303)) // Groups bits by 2 on, 6 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, uint64_t(0x000F000F000F000F)) // Groups bits by 4 on, 12 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, uint64_t(0x000000FF000000FF)) // Groups bits by 8 on, 24 off -NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, uint64_t(0x000000000000FFFF)) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS @@ -81,10 +102,9 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS #undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK #undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK -// ----------------------------------------------------------------- MORTON ENCODER --------------------------------------------------- - -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonEncoder +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder { template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) @@ -93,22 +113,26 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { - NBL_CONSTEXPR_STATIC encode_t EncodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue)& EncodeMasks[CodingStages]; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = Stages; i > 0; i--) - { - interleaved = interleaved | leftShift(interleaved, (uint32_t(1) << (i - 1)) * (Dim - 1)); - interleaved = interleaved & EncodeMasks[i - 1]; + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION // After interleaving, shift each coordinate left by their index - return leftShift(interleaved, _static_cast >(vector(0, 1, 2, 3))); + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } template 16), vector, vector > @@ -118,52 +142,58 @@ struct MortonEncoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_INLINE_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { - portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); - encode_t encoded = _static_cast(uint64_t(0)); array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + [[unroll]] - for (uint16_t i = 0; i < Dim; i++) + for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); return encoded; } -}; - -// ----------------------------------------------------------------- MORTON DECODER --------------------------------------------------- -template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::round_up_to_pot_v) -struct MortonDecoder -{ template 16), vector, vector > NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) - NBL_CONSTEXPR_STATIC_INLINE_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { - NBL_CONSTEXPR_STATIC encode_t DecodeMasks[CodingStages + 1] = { _static_cast(coding_mask_v), _static_cast(coding_mask_v), _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) , _static_cast(coding_mask_v) }; - arithmetic_right_shift_operator > rightShift; + arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded [[unroll]] for (uint16_t i = 0; i < Dim; i++) - setter(decoded, i, encodedValue); - decoded = rightShift(decoded, _static_cast >(vector(0, 1, 2, 3))); + setter(decoded, i, encodedRightShift(encodedValue, i)); - NBL_CONSTEXPR_STATIC uint16_t Stages = mpl::log2_ceil_v; - [[unroll]] - for (uint16_t i = 0; i < Stages; i++) - { - decoded = decoded & DecodeMasks[i]; - decoded = decoded | rightShift(decoded, (uint32_t(1) << i) * (Dim - 1)); + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ } + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) return _static_cast(decoded); else - return _static_cast(decoded & DecodeMasks[CodingStages]); + return _static_cast(decoded & coding_mask_v); } }; @@ -172,7 +202,7 @@ struct MortonDecoder // `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted template -NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::round_up_to_pot_v)); +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template struct Equals; @@ -181,13 +211,14 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR portable_vector_t zeros = _static_cast >(_static_cast >(vector(0,0,0,0))); + const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); - portable_vector_t rhsCasted = _static_cast >(rhs); - portable_vector_t xored = rhsCasted ^ value; - return xored == zeros; + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ value; + equal_to > equal; + return equal(xored, zeros); } }; @@ -195,10 +226,11 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const portable_vector_t interleaved = MortonEncoder::interleaveShift(rhs); - return Equals::__call(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return Equals::template __call(value, interleaved); } }; @@ -213,17 +245,28 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { - NBL_CONSTEXPR_STATIC portable_vector_t InterleaveMasks = _static_cast >(_static_cast >(vector(coding_mask_v, coding_mask_v << 1, coding_mask_v << 2, coding_mask_v << 3))); - NBL_CONSTEXPR_STATIC portable_vector_t SignMasks = _static_cast >(_static_cast >(vector(SignMask, SignMask << 1, SignMask << 2, SignMask << 3))); ComparisonOp comparison; - // Obtain a vector of deinterleaved coordinates and flip their sign bits - const portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; - // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); - return comparison(thisCoord, rhsCoord); + return comparison(thisCoord, rhsCoord); + } + } }; @@ -231,11 +274,11 @@ template { template) - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { - const vector interleaved = MortonEncoder::interleaveShift(rhs); - BaseComparison baseComparison; - return baseComparison(value, interleaved); + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return BaseComparison::template __call(value, interleaved); } }; @@ -283,7 +326,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::MortonEncoder::encode(cartesian); + retVal.value = impl::Transcoder::encode(cartesian); return retVal; } @@ -296,7 +339,7 @@ struct code * @param [in] cartesian Coordinates to encode */ template= Bits) - explicit code(NBL_CONST_REF_ARG(vector) cartesian) + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); } @@ -304,35 +347,35 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits) - constexpr inline explicit operator vector() const noexcept; + template= Bits && is_signed_v == Signed) + constexpr explicit operator vector() const noexcept; #endif // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value & rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value | rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = value ^ rhs.value; return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC { this_t retVal; retVal.value = ~value; @@ -342,15 +385,15 @@ struct code // Only valid in CPP #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint16_t bits) const; + constexpr this_t operator<<(uint16_t bits) const; - constexpr inline this_t operator>>(uint16_t bits) const; + constexpr this_t operator>>(uint16_t bits) const; #endif // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC { this_t zero; zero.value = _static_cast(0); @@ -363,107 +406,135 @@ struct code // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // put 1 bits everywhere in the bits the current axis is not using - // then extract just the axis bits for the right hand coordinate - // carry-1 will propagate the bits across the already set bits - // then clear out the bits not belonging to current axis - // Note: Its possible to clear on `this` and fill on `rhs` but that will - // disable optimizations, we expect the compiler to optimize a lot if the - // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` - retVal.value |= ((value | (~leftShift(Mask, i))) + (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { - NBL_CONSTEXPR_STATIC storage_t Mask = _static_cast(impl::coding_mask_v); - left_shift_operator leftShift; + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = impl::InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; this_t retVal; - retVal.value = _static_cast(uint64_t(0)); + retVal.value = getter(interleaveShiftedResult, 0); [[unroll]] - for (uint16_t i = 0; i < D; i++) - { - // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate - retVal.value |= ((value & leftShift(Mask, i)) - (rhs.value & leftShift(Mask, i))) & leftShift(Mask, i); - } + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; } // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_INLINE_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value == rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector equals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::__call(value, rhs); + return impl::Equals::template __call(value, rhs); } - NBL_CONSTEXPR_INLINE_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { return value != rhs.value; } template) - NBL_CONSTEXPR_INLINE_FUNC vector notEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return !equals(rhs); + return !equal(rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector less(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessThan::__call(value, rhs); + return impl::LessThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector lessEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::__call(value, rhs); + return impl::LessEquals::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greater(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterThan::__call(value, rhs); + return impl::GreaterThan::template __call(value, rhs); } template) - NBL_CONSTEXPR_INLINE_FUNC vector greaterEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::__call(value, rhs); + return impl::GreaterEquals::template __call(value, rhs); } }; } //namespace morton +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +{ + NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + template struct left_shift_operator > { using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { left_shift_operator valueLeftShift; type_t retVal; @@ -479,7 +550,7 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using storage_t = typename type_t::storage_t; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { arithmetic_right_shift_operator valueArithmeticRightShift; type_t retVal; @@ -496,10 +567,10 @@ struct arithmetic_right_shift_operator > using type_t = morton::code; using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; - NBL_CONSTEXPR_INLINE_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >> scalar_t(bits); + cartesian >>= scalar_t(bits); return type_t::create(cartesian); } }; @@ -507,45 +578,24 @@ struct arithmetic_right_shift_operator > #ifndef __HLSL_VERSION template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator<<(uint16_t bits) const +constexpr morton::code morton::code::operator<<(uint16_t bits) const { left_shift_operator> leftShift; return leftShift(*this, bits); } template&& D* Bits <= 64) -constexpr inline morton::code morton::code::operator>>(uint16_t bits) const +constexpr morton::code morton::code::operator>>(uint16_t bits) const { arithmetic_right_shift_operator> rightShift; return rightShift(*this, bits); } -#endif - -// Specialize the `static_cast_helper` -namespace impl -{ - -// I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > -{ - NBL_CONSTEXPR_STATIC_INLINE_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) - { - using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; - return morton::impl::MortonDecoder::decode(val.value); - } -}; - -} // namespace impl - -#ifndef __HLSL_VERSION - template && D* Bits <= 64) -template = Bits) -constexpr inline morton::code::operator vector() const noexcept +template = Bits && is_signed_v == Signed) +constexpr morton::code::operator vector() const noexcept { - return _static_cast, morton::code, Bits, D>>(*this); + return _static_cast, morton::code>(*this); } #endif diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 8add7a9ed3..901a8e419a 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -114,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bc160de788..a6b3db6708 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -684,6 +684,8 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; template NBL_CONSTEXPR uint64_t extent_v = extent::value; +template +NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index f03d8ae22c..c57eec4e61 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -248,9 +248,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/vector_impl.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") From 2d0ffbadf914f84e4f7d5bfc8fec3b860121f655 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:16:08 -0300 Subject: [PATCH 27/75] Fix the last of the operators --- include/nbl/builtin/hlsl/morton.hlsl | 70 +++++++++++++++++----------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 650d9ce6ba..d570e249c8 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -35,13 +35,16 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -template -NBL_CONSTEXPR portable_vector_t InterleaveMasks = _static_cast >( - truncate >( - vector(coding_mask_v, - coding_mask_v << 1, - coding_mask_v << 2, - coding_mask_v << 3))); +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + template struct sign_mask : integral_constant {}; @@ -49,13 +52,12 @@ struct sign_mask : integral_constant NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); -template -NBL_CONSTEXPR portable_vector_t SignMasks = _static_cast >( - truncate >( - vector(sign_mask_v, - sign_mask_v << 1, - sign_mask_v << 2, - sign_mask_v << 3))); +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) // 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage // mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). @@ -213,10 +215,11 @@ struct Equals template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); const portable_vector_t rhsCasted = _static_cast >(rhs); - const portable_vector_t xored = rhsCasted ^ value; + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); equal_to > equal; return equal(xored, zeros); } @@ -247,20 +250,22 @@ struct BaseComparison template) NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); ComparisonOp comparison; NBL_IF_CONSTEXPR(Signed) { // Obtain a vector of deinterleaved coordinates and flip their sign bits - portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; // rhs already deinterleaved, just have to cast type and flip sign - const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; return comparison(thisCoord, rhsCoord); } else { // Obtain a vector of deinterleaved coordinates - portable_vector_t thisCoord = InterleaveMasks & value; + portable_vector_t thisCoord = InterleaveMasks & value; // rhs already deinterleaved, just have to cast type const portable_vector_t rhsCoord = _static_cast >(rhs); @@ -415,13 +420,14 @@ struct code // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); bit_not > bitnot; // For each coordinate, leave its bits intact and turn every other bit ON - const portable_vector_t counterMaskedValue = bitnot(impl::InterleaveMasks) | value; + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; // For each coordinate in rhs, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -429,19 +435,19 @@ struct code [[unroll]] for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); - return retVal; } // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); // For each coordinate, leave its bits intact and turn every other bit OFF - const portable_vector_t maskedValue = impl::InterleaveMasks & value; + const portable_vector_t maskedValue = InterleaveMasks & value; // Do the same for each coordinate in rhs - const portable_vector_t maskedRhsValue = impl::InterleaveMasks & rhs.value; + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF - const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & impl::InterleaveMasks; + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; // Re-encode the result array_get, storage_t> getter; this_t retVal; @@ -540,6 +546,10 @@ struct left_shift_operator > type_t retVal; // Shift every coordinate by `bits` retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); return retVal; } }; @@ -570,7 +580,12 @@ struct arithmetic_right_shift_operator > NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) { vector cartesian = _static_cast >(operand); - cartesian >>= scalar_t(bits); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); return type_t::create(cartesian); } }; @@ -600,6 +615,9 @@ constexpr morton::code::operator vector() cons #endif +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + } //namespace hlsl } //namespace nbl From 68edc322f2ba9c19ab0bd8068da2bae2390d7182 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:19:48 -0300 Subject: [PATCH 28/75] Change examples test submodule for master merge --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index f2ea51d0b3..f4cc4cd22e 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 +Subproject commit f4cc4cd22ee4bd5506d794e63caafddf974ed7a4 From 977c7dddb9300e830432df96a77d58121063775c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:44:51 +0700 Subject: [PATCH 29/75] Add constexpr to _static_cast --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 57 +++++++++++---------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 87baa1f0d6..f871e2a23d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,34 +3,6 @@ #include -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} #ifndef __HLSL_VERSION #include @@ -102,4 +74,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif From 3294d0451c367aaa5963eebf3ce3ec7f850f852c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:22 +0700 Subject: [PATCH 30/75] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 6 +++--- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 18 +++++++++--------- include/nbl/builtin/hlsl/morton.hlsl | 18 +++++++++--------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 4566e2097b..242e30dfbe 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -673,7 +673,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) struct select_helper) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { return condition ? object1 : object2; } @@ -683,7 +683,7 @@ template NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > { - NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) { using traits = hlsl::vector_traits; array_get conditionGetter; @@ -701,7 +701,7 @@ struct select_helper&& concepts::V template struct undef_helper { - NBL_CONSTEXPR_STATIC_FUNC T __call() + NBL_CONSTEXPR_STATIC T __call() { T t; return t; diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 3780ce001b..47eb573359 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -192,7 +192,7 @@ NBL_CONSTEXPR_FUNC vector operator##OP (vector;\ using component_t = ComponentType;\ -NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +NBL_CONSTEXPR_STATIC this_t create(this_t other)\ {\ CRTP output;\ [[unroll]]\ @@ -209,7 +209,7 @@ NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ -NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ {\ this_t output;\ [[unroll]]\ @@ -356,7 +356,7 @@ struct emulated_vector, CRTP> : using component_t = emulated_float64_t; using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) + NBL_CONSTEXPR_STATIC this_t create(this_t other) { this_t output; @@ -367,7 +367,7 @@ struct emulated_vector, CRTP> : } template - NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) + NBL_CONSTEXPR_STATIC this_t create(vector other) { this_t output; @@ -505,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -518,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -532,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) + NBL_CONSTEXPR_STATIC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -550,7 +550,7 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; @@ -569,7 +569,7 @@ struct static_cast_helper, emulated_vecto {\ using OutputVecType = emulated_vector_t##N ;\ using InputVecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ {\ array_get getter;\ array_set setter;\ diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d570e249c8..35ce511359 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -115,7 +115,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; @@ -144,7 +144,7 @@ struct Transcoder * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) { const portable_vector_t interleaveShifted = interleaveShift(decodedValue); @@ -165,7 +165,7 @@ struct Transcoder * * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) */ - NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) { arithmetic_right_shift_operator encodedRightShift; portable_vector_t decoded; @@ -213,7 +213,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); @@ -229,7 +229,7 @@ template struct Equals { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -248,7 +248,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); @@ -279,7 +279,7 @@ template { template) - NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); @@ -327,7 +327,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; @@ -525,7 +525,7 @@ namespace impl template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > { - NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; return morton::impl::Transcoder::decode(val.value); From e2401c6ff03dd39c58751a4e10d0a5d65065c23d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:55:51 +0700 Subject: [PATCH 31/75] Add template<> to signify specialization --- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 8a3fd42faf..7f52638c61 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -370,12 +370,14 @@ constexpr inline emulated_int64_base emulated_int64_base::operat return leftShift(*this, bits); } +template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } +template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; From 07f7a4acf59637f0641ed5ac485a55c28befd07b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:13 +0700 Subject: [PATCH 32/75] Remove duplicate partial specialization. --- include/nbl/builtin/hlsl/type_traits.hlsl | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index fb05e11fe2..bf2a35ede9 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -855,9 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - template struct extent, 0> : integral_constant {}; From 42baa6c7eb7d78234d78ae12aba5d82ccdc32447 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:56:54 +0700 Subject: [PATCH 33/75] Change NBL_CONSTEXPR_STATIC_FUNC to NBL_CONSTEXPR_STATIC --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 7f52638c61..89c9e2e733 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -43,7 +43,7 @@ struct emulated_int64_base * * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) { this_t retVal; retVal.data = _data; @@ -56,7 +56,7 @@ struct emulated_int64_base * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated */ - NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) { return create(storage_t(lo, hi)); } @@ -114,6 +114,24 @@ struct emulated_int64_base constexpr inline this_t operator<<(uint32_t bits) const; constexpr inline this_t operator>>(uint32_t bits) const; + constexpr inline this_t& operator&=(const this_t& val) + { + data &= val.data; + return *this; + } + + constexpr inline this_t& operator|=(const this_t& val) + { + data |= val.data; + return *this; + } + + constexpr inline this_t& operator^=(const this_t& val) + { + data ^= val.data; + return *this; + } + #endif // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- @@ -191,7 +209,7 @@ struct static_cast_helper, emulated_int64_base; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { To retVal; retVal.data = other.data; @@ -206,7 +224,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using From = emulated_int64_base; // Return only the lowest bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return _static_cast(val.data.x); } @@ -218,7 +236,7 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con using To = I; using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } @@ -231,7 +249,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using From = I; // Set only lower bits - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { return To::create(_static_cast(i), uint32_t(0)); } @@ -243,7 +261,7 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con using To = emulated_int64_base; using From = I; - NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union #ifndef __HLSL_VERSION @@ -417,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -457,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From 22e78eb4bbd9ae1675ec3812fe6773dc34ecc508 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 15:57:53 +0700 Subject: [PATCH 34/75] Fix concatenation of 'operator' and OP with '##' since operatorOP is not a single token --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 47eb573359..cdeddeb105 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -133,17 +133,17 @@ struct emulated_vector; // Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral #define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i).operator##OP());\ + output.setComponent(i, this_t::getComponent(i).operator OP());\ return output;\ } #define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -151,7 +151,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ output.setComponent(i, this_t::getComponent(i) OP val);\ return output;\ }\ -NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -161,7 +161,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ [[unroll]]\ @@ -170,7 +170,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe return output;\ } -#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ @@ -180,7 +180,7 @@ NBL_CONSTEXPR_FUNC this_t operator##OP(vector othe } #define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +NBL_CONSTEXPR_FUNC vector operator OP (vector other) NBL_CONST_MEMBER_FUNC \ {\ vector output;\ [[unroll]]\ From 8daf855e0a88f6d5b81b6b94b1b3426a4da211ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:27 +0700 Subject: [PATCH 35/75] 'equals' to 'equal' --- include/nbl/builtin/hlsl/morton.hlsl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 35ce511359..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -207,10 +207,10 @@ template && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); template -struct Equals; +struct Equal; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) @@ -226,14 +226,14 @@ struct Equals }; template -struct Equals +struct Equal { template) NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); - return Equals::template __call(value, interleaved); + return Equal::template __call(value, interleaved); } }; @@ -291,13 +291,13 @@ template > > {}; template -struct LessEquals : BaseComparison > > {}; +struct LessEqual : BaseComparison > > {}; template struct GreaterThan : BaseComparison > > {}; template -struct GreaterEquals : BaseComparison > > {}; +struct GreaterEqual : BaseComparison > > {}; } //namespace impl @@ -470,7 +470,7 @@ struct code NBL_FUNC_REQUIRES(impl::Comparable) NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::Equals::template __call(value, rhs); + return impl::Equal::template __call(value, rhs); } NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC @@ -494,9 +494,9 @@ struct code template) - NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector lessThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::LessEquals::template __call(value, rhs); + return impl::LessEqual::template __call(value, rhs); } template) - NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC vector greaterThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC { - return impl::GreaterEquals::template __call(value, rhs); + return impl::GreaterEqual::template __call(value, rhs); } }; From 831244f2f407763511f47d9332560e5d2bf9d308 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:31:41 +0700 Subject: [PATCH 36/75] Pass vec by value not ref --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 0afe214de7..27461d5949 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC T operator()(const U v) { return T(v); } From a560180f8ef3962921e3060423250fbb37ecb0ea Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 20 Nov 2025 16:41:08 +0700 Subject: [PATCH 37/75] Use truncate to truncate --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..4512774b14 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From e320ed8f8f620d1c133b14edf8ec7b96bfb39956 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 22 Nov 2025 12:30:57 +0700 Subject: [PATCH 38/75] Make morton compile --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 4512774b14..696124ae0c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -118,7 +118,7 @@ struct Transcoder NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) { left_shift_operator > leftShift; - portable_vector_t interleaved = truncate >(decodedValue) & coding_mask_v; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ From 83d27c9b1050be68bddd5c5b4abed0f2c3fca94c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 27 Nov 2025 20:55:09 +0700 Subject: [PATCH 39/75] NBL_CONSTEXPR_INLINE_VAR for template constexpr variable --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index f871e2a23d..84d7b9d8b0 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -13,6 +13,7 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline +#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) @@ -44,6 +45,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_INLINE_VAR inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 89c9e2e733..2214835df9 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -435,13 +435,13 @@ struct minus > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -475,13 +475,13 @@ struct minus_assign > }; template<> -NBL_CONSTEXPR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- From c49691656ac8f54e5fbcaa298c2f75050235f682 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:07:48 +0700 Subject: [PATCH 40/75] Promote and Truncate take vector and scalar by value and the rest by reference --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 14 ++++++++------ include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 9 +++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 27461d5949..e267895ed5 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - NBL_CONSTEXPR_FUNC T operator()(const U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -67,13 +67,15 @@ struct Promote, U> } +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template -NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { impl::Promote _promote; return _promote(v); } + } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index a95df183be..1e6b5b0f94 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -24,7 +24,7 @@ struct Truncate template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0] }; return truncated; @@ -34,7 +34,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1]}; return truncated; @@ -44,7 +44,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2] }; return truncated; @@ -54,7 +54,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { - NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + NBL_CONSTEXPR_FUNC vector operator()(const vector v) { vector truncated = { v[0], v[1], v[2], v[3] }; return truncated; @@ -63,6 +63,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl +// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From d7bd053e5c129bdad2ced9a399d182a51ba239e6 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:10:57 +0700 Subject: [PATCH 41/75] Remove promote and truncate comment about specialization --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 1 - include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 1 - 2 files changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index e267895ed5..6e75a55b1b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -67,7 +67,6 @@ struct Promote, U> } -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 1e6b5b0f94..63e0ab7b93 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -63,7 +63,6 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } //namespace impl -// TODO(kevinyu): Should we specialize this for vector and scalar to take argument by value instead of reference? template NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) { From 3f3a23e13548f0d140c1dfa86a4e877b0d29214f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:11:12 +0700 Subject: [PATCH 42/75] Add comment to rename log2 --- include/nbl/builtin/hlsl/mpl.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 4594662969..7de4983c8e 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -110,6 +110,7 @@ struct round_up_to_pot : integral_constant NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +// TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template From 8dcdfdd930a99487134e65de707bb2d675cf5446 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 17:17:54 +0700 Subject: [PATCH 43/75] Change dimension type from uint16_t to int32_t --- include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 63e0ab7b93..38467942f9 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -21,7 +21,7 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -31,7 +31,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -41,7 +41,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) @@ -51,7 +51,7 @@ struct Truncate, vector NBL_PARTIAL_REQ_BOT(concept } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > { NBL_CONSTEXPR_FUNC vector operator()(const vector v) From 92cd9e775f49d87c46a8c24228dcfbfa909e0866 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:21:10 +0700 Subject: [PATCH 44/75] Redefine some macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 84d7b9d8b0..bbb2a73ee7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,6 +9,7 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -16,6 +17,9 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr +#define NBL_CONSTEXPR_OOL_MEMBER constexpr +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl @@ -41,13 +45,17 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR inline +#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const +#define NBL_CONSTEXPR_OOL_MEMBER const +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl From 7f6d8b82bdd3825e2ad99b9c60695ef7d1ed0c6f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 18:42:41 +0700 Subject: [PATCH 45/75] use const instead of static const for local variable in hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 76b527f6bd..fd23ad388c 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -321,7 +321,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -336,7 +336,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -351,7 +351,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -366,7 +366,7 @@ struct left_shift_operator && conc { array_get getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); left_shift_operator leftShift; T shifted; [[unroll]] @@ -416,7 +416,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -431,7 +431,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -446,7 +446,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] @@ -461,7 +461,7 @@ struct arithmetic_right_shift_operator getter; array_set setter; - NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); arithmetic_right_shift_operator rightShift; T shifted; [[unroll]] From 1d9ce208cf873228ff7de438608b7b59af2506ca Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 20:26:21 +0700 Subject: [PATCH 46/75] Rename NBL_CONSTEXPR_INLINE to NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR --- include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl | 2 +- include/nbl/builtin/hlsl/cpp_compat/basic.h | 4 +- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 2 +- include/nbl/builtin/hlsl/math/functions.hlsl | 4 +- .../math/quadrature/gauss_legendre/impl.hlsl | 56 +++++++++---------- include/nbl/builtin/hlsl/mpl.hlsl | 24 ++++---- include/nbl/builtin/hlsl/numbers.hlsl | 28 +++++----- include/nbl/builtin/hlsl/type_traits.hlsl | 26 ++++----- .../hlsl/workgroup2/arithmetic_config.hlsl | 2 +- 9 files changed, 74 insertions(+), 74 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index b27c892abe..40f64d9cf8 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -406,7 +406,7 @@ template struct is_ggx : impl::is_ggx {}; template -NBL_CONSTEXPR bool is_ggx_v = is_ggx::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx::value; } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index bbb2a73ee7..89c10d14fd 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -9,7 +9,6 @@ #define ARROW -> #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline @@ -17,6 +16,7 @@ #define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline #define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline @@ -45,7 +45,6 @@ namespace nbl::hlsl #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static @@ -53,6 +52,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static #define NBL_CONSTEXPR_FUNC_SCOPE_VAR const #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 242e30dfbe..7850fd7cf3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -27,7 +27,7 @@ template::type; - NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; + NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; UnsignedInteger absVal = val & Mask; return absVal > (ieee754::traits::specialValueExp << ieee754::traits::mantissaBitCnt); } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 20442c467b..21f0e6ef2b 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -123,9 +123,9 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { #ifdef __HLSL_VERSION - NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); #else - NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); #endif const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 3bcfbb2388..cd402d0cd4 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -14,25 +14,25 @@ namespace float_t_namespace { -NBL_CONSTEXPR float_t xi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), TYPED_NUMBER(0.5773502691896257) }; -NBL_CONSTEXPR float_t xi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.7745966692414833), TYPED_NUMBER(0.7745966692414833) }; -NBL_CONSTEXPR float_t xi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = { TYPED_NUMBER(-0.3399810435848562), TYPED_NUMBER(0.3399810435848562), TYPED_NUMBER(-0.8611363115940525), TYPED_NUMBER(0.8611363115940525) }; -NBL_CONSTEXPR float_t xi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.5384693101056830), TYPED_NUMBER(0.5384693101056830), @@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = { TYPED_NUMBER(0.9061798459386639) }; -NBL_CONSTEXPR float_t xi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = { TYPED_NUMBER(0.6612093864662645), TYPED_NUMBER(-0.6612093864662645), TYPED_NUMBER(-0.2386191860831969), @@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = { TYPED_NUMBER(0.9324695142031520) }; -NBL_CONSTEXPR float_t xi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = { TYPED_NUMBER(0.0), TYPED_NUMBER(0.4058451513773971), TYPED_NUMBER(-0.4058451513773971), @@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = { TYPED_NUMBER(0.9491079123427585) }; -NBL_CONSTEXPR float_t xi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = { TYPED_NUMBER(-0.1834346424956498), TYPED_NUMBER(0.1834346424956498), TYPED_NUMBER(-0.5255324099163289), @@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = { TYPED_NUMBER(0.9602898564975362) }; -NBL_CONSTEXPR float_t xi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.8360311073266357), TYPED_NUMBER(0.8360311073266357), @@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = { TYPED_NUMBER(0.6133714327005903) }; -NBL_CONSTEXPR float_t xi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = { TYPED_NUMBER(-0.1488743389816312), TYPED_NUMBER(0.1488743389816312), TYPED_NUMBER(-0.4333953941292471), @@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = { TYPED_NUMBER(0.9739065285171717) }; -NBL_CONSTEXPR float_t xi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2695431559523449), TYPED_NUMBER(0.2695431559523449), @@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = { TYPED_NUMBER(0.9782286581460569) }; -NBL_CONSTEXPR float_t xi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = { TYPED_NUMBER(-0.1252334085114689), TYPED_NUMBER(0.1252334085114689), TYPED_NUMBER(-0.3678314989981801), @@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = { TYPED_NUMBER(0.9815606342467192) }; -NBL_CONSTEXPR float_t xi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2304583159551347), TYPED_NUMBER(0.2304583159551347), @@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = { TYPED_NUMBER(0.9841830547185881) }; -NBL_CONSTEXPR float_t xi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = { TYPED_NUMBER(-0.1080549487073436), TYPED_NUMBER(0.1080549487073436), TYPED_NUMBER(-0.3191123689278897), @@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = { TYPED_NUMBER(0.9862838086968123) }; -NBL_CONSTEXPR float_t xi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2011940939974345), TYPED_NUMBER(0.2011940939974345), @@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = { TYPED_NUMBER(0.9879925180204854) }; -NBL_CONSTEXPR float_t wi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = { TYPED_NUMBER(1.0000000000000000), TYPED_NUMBER(1.0000000000000000) }; -NBL_CONSTEXPR float_t wi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = { TYPED_NUMBER(0.8888888888888888), TYPED_NUMBER(0.5555555555555555), TYPED_NUMBER(0.5555555555555555) }; -NBL_CONSTEXPR float_t wi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = { TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.3478548451374538), TYPED_NUMBER(0.3478548451374538) }; -NBL_CONSTEXPR float_t wi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = { TYPED_NUMBER(0.5688888888888888), TYPED_NUMBER(0.4786286704993664), TYPED_NUMBER(0.4786286704993664), @@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = { TYPED_NUMBER(0.2369268850561890) }; -NBL_CONSTEXPR float_t wi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = { TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.4679139345726910), @@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = { TYPED_NUMBER(0.1713244923791703) }; -NBL_CONSTEXPR float_t wi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = { TYPED_NUMBER(0.4179591836734693), TYPED_NUMBER(0.3818300505051189), TYPED_NUMBER(0.3818300505051189), @@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = { TYPED_NUMBER(0.1294849661688696) }; -NBL_CONSTEXPR float_t wi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = { TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3137066458778872), @@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = { TYPED_NUMBER(0.1012285362903762) }; -NBL_CONSTEXPR float_t wi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = { TYPED_NUMBER(0.3302393550012597), TYPED_NUMBER(0.1806481606948574), TYPED_NUMBER(0.1806481606948574), @@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = { TYPED_NUMBER(0.2606106964029354) }; -NBL_CONSTEXPR float_t wi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = { TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2692667193099963), @@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = { TYPED_NUMBER(0.0666713443086881) }; -NBL_CONSTEXPR float_t wi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = { TYPED_NUMBER(0.2729250867779006), TYPED_NUMBER(0.2628045445102466), TYPED_NUMBER(0.2628045445102466), @@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = { TYPED_NUMBER(0.0556685671161736) }; -NBL_CONSTEXPR float_t wi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = { TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2334925365383548), @@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = { TYPED_NUMBER(0.0471753363865118) }; -NBL_CONSTEXPR float_t wi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = { TYPED_NUMBER(0.2325515532308739), TYPED_NUMBER(0.2262831802628972), TYPED_NUMBER(0.2262831802628972), @@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = { TYPED_NUMBER(0.0404840047653158) }; -NBL_CONSTEXPR float_t wi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = { TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2051984637212956), @@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = { TYPED_NUMBER(0.0351194603317518) }; -NBL_CONSTEXPR float_t wi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = { TYPED_NUMBER(0.2025782419255612), TYPED_NUMBER(0.1984314853271115), TYPED_NUMBER(0.1984314853271115), diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 7de4983c8e..7734dea15f 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -41,12 +41,12 @@ struct countl_zero : impl::countl_zero static_assert(is_integral::value, "countl_zero type parameter must be an integral type"); }; template -NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero::value; template struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot::value; template struct log2 @@ -54,12 +54,12 @@ struct log2 NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint16_t log2_v = log2::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2::value; template struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; template -NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -69,7 +69,7 @@ struct rotl NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r))); }; template -NBL_CONSTEXPR T rotl_v = rotl::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl::value; template struct rotr @@ -79,7 +79,7 @@ struct rotr NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r))); }; template -NBL_CONSTEXPR T rotr_v = rotr::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr::value; template struct align_up @@ -87,7 +87,7 @@ struct align_up NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0; }; template -NBL_CONSTEXPR uint64_t align_up_v = align_up::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up::value; template struct max @@ -95,7 +95,7 @@ struct max NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T max_v = max::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max::value; template struct min @@ -103,18 +103,18 @@ struct min NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T min_v = min::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min::value; template struct round_up_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot::value; // TODO: should rename log2 to log2_floor template struct round_down_to_pot : integral_constant > {}; template -NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot::value; template struct find_lsb @@ -122,7 +122,7 @@ struct find_lsb NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2::value; }; template -NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb::value; } } } diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl index 6671a44756..4594596590 100644 --- a/include/nbl/builtin/hlsl/numbers.hlsl +++ b/include/nbl/builtin/hlsl/numbers.hlsl @@ -11,33 +11,33 @@ namespace numbers { template -NBL_CONSTEXPR float_t e = float_t(2.718281828459045); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045); template -NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634); template -NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518); template -NBL_CONSTEXPR float_t pi = float_t(3.141592653589793); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793); template -NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907); template -NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563); template -NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453); template -NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896); template -NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046); template -NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951); template -NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772); template -NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257); template -NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329); template -NBL_CONSTEXPR float_t phi = float_t(1.618033988749895); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895); } } diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index bf2a35ede9..b682b8da8b 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -638,25 +638,25 @@ using conditional_t = typename conditional::type; // Template Variables template -NBL_CONSTEXPR T integral_constant_v = integral_constant::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant::value; template -NBL_CONSTEXPR bool is_same_v = is_same::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same::value; template -NBL_CONSTEXPR bool is_unsigned_v = is_unsigned::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned::value; template -NBL_CONSTEXPR bool is_integral_v = is_integral::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral::value; template -NBL_CONSTEXPR bool is_floating_point_v = is_floating_point::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point::value; template -NBL_CONSTEXPR bool is_signed_v = is_signed::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed::value; template -NBL_CONSTEXPR bool is_scalar_v = is_scalar::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar::value; template -NBL_CONSTEXPR uint64_t size_of_v = size_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of::value; template -NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of::value; template -NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions @@ -685,7 +685,7 @@ template struct is_vector > : bool_constant {}; template -NBL_CONSTEXPR bool is_vector_v = is_vector::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector::value; #ifndef __HLSL_VERSION template @@ -696,7 +696,7 @@ template struct is_matrix > : bool_constant {}; template -NBL_CONSTEXPR bool is_matrix_v = is_matrix::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix::value; template @@ -741,7 +741,7 @@ struct extent, I> : integral_constant: // Template Variables template -NBL_CONSTEXPR uint64_t extent_v = extent::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent::value; template::value> diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 03ccd64d4e..22c93ce193 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -225,7 +225,7 @@ template struct is_configuration > : bool_constant {}; template -NBL_CONSTEXPR bool is_configuration_v = is_configuration::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration::value; } } From 1eded124d9a0f26251e6e7ad22843ac57e0f288b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 14:59:38 +0700 Subject: [PATCH 47/75] Refactor emulated_integral_64 --- .../emulated/int64_common_member_inc.hlsl | 155 ++++++++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 332 ++++++------------ include/nbl/builtin/hlsl/functional.hlsl | 2 +- 3 files changed, 261 insertions(+), 228 deletions(-) create mode 100644 include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl new file mode 100644 index 0000000000..2dd7bafa41 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -0,0 +1,155 @@ + +storage_t data; + +/** +* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern +* +* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) +{ + this_t retVal; + retVal.data = _data; + return retVal; +} + +/** +* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern +* +* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated +* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) +{ + return create(storage_t(lo, hi)); +} + +// ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- +// GLM requires these for vector casts + +#ifndef __HLSL_VERSION + +template +constexpr explicit operator I() const noexcept; + +#endif + +// ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC +{ + return data.x; +} + +NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC +{ + return data.y; +} + +// ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data & rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data | rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data ^ rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(~data); + return retVal; +} + +// Only valid in CPP +#ifndef __HLSL_VERSION +constexpr inline this_t operator>>(uint32_t bits) const; + +constexpr inline this_t operator<<(uint32_t bits) const; + +constexpr inline this_t& operator&=(const this_t& val) +{ + data &= val.data; + return *this; +} + +constexpr inline this_t& operator|=(const this_t& val) +{ + data |= val.data; + return *this; +} + +constexpr inline this_t& operator^=(const this_t& val) +{ + data ^= val.data; + return *this; +} + +#endif + +// ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); +} + +NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); +} + +// ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- +NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + equal_to equals; + return all(equals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator>(rhs); +} + +NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator<(rhs); +} diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 2214835df9..ce98d5268f 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -15,199 +15,92 @@ namespace nbl namespace hlsl { -template -struct emulated_int64_base +struct emulated_int64_t; + +struct emulated_uint64_t { using storage_t = vector; - using this_t = emulated_int64_base; - using this_signed_t = emulated_int64_base; - - storage_t data; + using this_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; - // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + #include "int64_common_member_inc.hlsl" #ifndef __HLSL_VERSION - - emulated_int64_base() = default; - + emulated_uint64_t() = default; // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit emulated_int64_base(const I& toEmulate); - - constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + constexpr explicit emulated_uint64_t(const I& toEmulate); + constexpr explicit emulated_uint64_t(const emulated_int64_t& other); #endif +}; - /** - * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern - * - * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) - { - this_t retVal; - retVal.data = _data; - return retVal; - } - - /** - * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern - * - * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated - * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated - */ - NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) - { - return create(storage_t(lo, hi)); - } - - // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- - // GLM requires these for vector casts +struct emulated_int64_t +{ + using storage_t = vector; + using this_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; + + #include "int64_common_member_inc.hlsl" + #ifndef __HLSL_VERSION - + emulated_int64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` template - constexpr explicit operator I() const noexcept; + constexpr explicit emulated_int64_t(const I& toEmulate); + constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC { - return data.x; - } - - NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC - { - return data.y; - } - - // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data & rhs.data); - return retVal; - } - - NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data | rhs.data); - return retVal; + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); } +}; - NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(data ^ rhs.data); - return retVal; - } +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC - { - this_t retVal = create(~data); - return retVal; - } +template<> +struct is_signed : bool_constant {}; - // Only valid in CPP - #ifndef __HLSL_VERSION - constexpr inline this_t operator<<(uint32_t bits) const; - constexpr inline this_t operator>>(uint32_t bits) const; +template<> +struct is_unsigned : bool_constant {}; - constexpr inline this_t& operator&=(const this_t& val) - { - data &= val.data; - return *this; - } +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ - constexpr inline this_t& operator|=(const this_t& val) - { - data |= val.data; - return *this; - } +template +NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; - constexpr inline this_t& operator^=(const this_t& val) - { - data ^= val.data; - return *this; - } - - #endif - - // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- - - NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC - { - vector negated = -data; - return this_signed_t::create(_static_cast(negated)); - } - - NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); - return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); - } - - NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); - return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); - } - - // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- - NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - equal_to equals; - return all(equals(data, rhs.data)); - } - - NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - not_equal_to notEquals; - return any(notEquals(data, rhs.data)); - } +namespace impl +{ - NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less - // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); - const bool LSB = __getLSB() < rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; - NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - // Same reasoning as above - const bool MSBEqual = __getMSB() == rhs.__getMSB(); - const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); - const bool LSB = __getLSB() > rhs.__getLSB(); - return MSBEqual ? LSB : MSB; - } +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} - NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator>(rhs); - } - NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC - { - return !operator<(rhs); - } -}; +} -using emulated_uint64_t = emulated_int64_base; -using emulated_int64_t = emulated_int64_base; namespace impl { -template -struct static_cast_helper, emulated_int64_base > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) +struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > { - using To = emulated_int64_base; - using From = emulated_int64_base; NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) { @@ -217,12 +110,9 @@ struct static_cast_helper, emulated_int64_base NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -230,24 +120,18 @@ struct static_cast_helper NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = I; - using From = emulated_int64_base; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { return bit_cast(val.data); } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -255,12 +139,9 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con } }; -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) -struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > { - using To = emulated_int64_base; - using From = I; - NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { // `bit_cast` blocked by GLM vectors using a union @@ -280,16 +161,30 @@ struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(con #ifndef __HLSL_VERSION -template +constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {} + +constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {} + +template +constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + template -constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate) { - *this = _static_cast>(toEmulate); + *this = _static_cast(toEmulate); } -template template -constexpr emulated_int64_base::operator I() const noexcept +constexpr emulated_int64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +template +constexpr emulated_uint64_t::operator I() const noexcept { return _static_cast(*this); } @@ -298,28 +193,27 @@ constexpr emulated_int64_base::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template -struct left_shift_operator > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct left_shift_operator) > { - using type_t = emulated_int64_base; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) { return operator()(operand, _static_cast(bits)); } @@ -381,21 +275,24 @@ struct arithmetic_right_shift_operator #ifndef __HLSL_VERSION -template -constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const +constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const { left_shift_operator leftShift; return leftShift(*this, bits); } -template<> constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; return rightShift(*this, bits); } -template<> constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const { arithmetic_right_shift_operator rightShift; @@ -404,14 +301,15 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co #endif + // ---------------------- STD arithmetic operators ------------------------ // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template -struct plus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -421,10 +319,10 @@ struct plus > const static type_t identity; }; -template -struct minus > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus) > { - using type_t = emulated_int64_base; + using type_t = T; type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) { @@ -446,10 +344,10 @@ NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _s // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template -struct plus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct plus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = plus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -460,10 +358,10 @@ struct plus_assign > const static type_t identity; }; -template -struct minus_assign > +template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) +struct minus_assign) > { - using type_t = emulated_int64_base; + using type_t = T; using base_t = minus; base_t baseOp; void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) @@ -483,26 +381,6 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- - -template<> -struct is_signed : bool_constant {}; - -template<> -struct is_unsigned : bool_constant {}; - -// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- -namespace concepts -{ -namespace impl -{ -template -struct is_emulating_integral_scalar > -{ - NBL_CONSTEXPR_STATIC_INLINE bool value = true; -}; -} -} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index fd23ad388c..98858bae80 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -219,7 +219,7 @@ NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) // ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- -#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ +#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ using base_t = NAME ; \ base_t baseOp; \ From aa9e24daf8bb7bae9ff743f1db899234819ac17f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:11 +0700 Subject: [PATCH 48/75] Add unary_minus_operator class --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 36 ++++++++++++++----- include/nbl/builtin/hlsl/functional.hlsl | 10 ++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 89c10d14fd..b51860a399 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -8,6 +8,7 @@ #include #define ARROW -> +#define NBL_DEREF_THIS (*this) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static @@ -44,6 +45,7 @@ namespace nbl::hlsl #else #define ARROW .arrow(). +#define NBL_DEREF_THIS this #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ce98d5268f..ba4facad01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -53,11 +53,8 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC this_t operator-() NBL_CONST_MEMBER_FUNC - { - storage_t inverted = ~data; - return create(_static_cast(inverted)) + _static_cast(1); - } + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + }; // ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- @@ -196,24 +193,25 @@ constexpr emulated_uint64_t::operator I() const noexcept template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) struct left_shift_operator) > { + using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); // Can't do generic templated definition, see: //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, uint32_t bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) { const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; - const T shifted = T::create(bigShift ? vector(0, operand.__getLSB() << shift) + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; + ternary_operator ternary; return ternary(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, T bits) + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) { return operator()(operand, _static_cast(bits)); } @@ -381,6 +379,26 @@ NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::iden template<> NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +// --------------------------------- Unary operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl +template<> +struct unary_minus_operator +{ + using type_t = emulated_int64_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) + { + using storage_t = type_t::storage_t; + storage_t inverted = ~operand.data; + return type_t::create(_static_cast(inverted)) + _static_cast(1); + } +}; + +NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC +{ + unary_minus_operator unaryMinus; + return unaryMinus(NBL_DEREF_THIS); +} } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 98858bae80..f0730a12d2 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -487,7 +487,17 @@ struct logical_right_shift_operator } }; +// ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- +template +struct unary_minus_operator +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) + { + return -operand; + } +}; } //namespace nbl } //namespace hlsl From 6683cd5a0f7965caa8484ea40b3847bab23b54a0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:42:29 +0700 Subject: [PATCH 49/75] Remove commented code on emulated/vector_t.hlsl --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index cdeddeb105..f153fb1062 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -40,9 +40,6 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -77,9 +74,6 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } @@ -118,9 +112,6 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - //using TAsUint = typename unsigned_integer_of_size::type; - //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - //return nbl::hlsl::bit_cast(invalidComponentValue); return nbl::hlsl::undef(); } From cdb6ad7d3865af1e3390127af5da008e44ead6ce Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 21:00:47 +0700 Subject: [PATCH 50/75] Unify all Truncate specializaton for vector type --- .../nbl/builtin/hlsl/cpp_compat/truncate.hlsl | 50 ++++++------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl index 38467942f9..ffe3d12641 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -9,6 +9,12 @@ namespace nbl namespace hlsl { +namespace concepts +{ + template + NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial && concepts::Vectorial && concepts::same_as::scalar_type, typename vector_traits::scalar_type > && vector_traits::Dimension <= vector_traits::Dimension; +} + namespace impl { @@ -21,44 +27,20 @@ struct Truncate } }; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector) +struct Truncate) > { - NBL_CONSTEXPR_FUNC vector operator()(const vector v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector truncated = { v[0] }; - return truncated; + array_get::scalar_type> getter; + array_set::scalar_type> setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, getter(v, i)); + return output; } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1]}; - return truncated; - } -}; -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2] }; - return truncated; - } -}; - -template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) -struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > -{ - NBL_CONSTEXPR_FUNC vector operator()(const vector v) - { - vector truncated = { v[0], v[1], v[2], v[3] }; - return truncated; - } }; } //namespace impl From c365240ed060b45d535e3a9293c91da2d9f01e61 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 23:04:34 +0700 Subject: [PATCH 51/75] Fix promote.hlsl and reduce the amount of specialization for Promote --- .../nbl/builtin/hlsl/cpp_compat/promote.hlsl | 49 ++++--------------- include/nbl/builtin/hlsl/morton.hlsl | 5 +- 2 files changed, 12 insertions(+), 42 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6e75a55b1b..cd4ac3193c 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,50 +21,21 @@ struct Promote } }; -#ifdef __HLSL_VERSION - -template -struct Promote, U> +// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector promoted = {Scalar(v)}; - return promoted; + array_set setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, v); + return output; } }; -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(const U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -#endif - } template diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 696124ae0c..6968d414fc 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -137,8 +137,7 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + template 16), vector, vector > > /** * @brief Encodes a vector of cartesian coordinates as a Morton code * @@ -216,7 +215,7 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); + const portable_vector_t zeros = promote>(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); From ec1d6745fb34451f9fda2f0a68a6047866630e14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:26 +0700 Subject: [PATCH 52/75] Make promote constrainable --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index cd4ac3193c..9f2b58047f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -12,7 +12,7 @@ namespace impl { // partial specialize this for `T=matrix|vector` and `U=matrix|vector|scalar_t` -template +template struct Promote { NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) From 51e35cf27e59311b5abd586b424d63a6502fdeb3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:09:34 +0700 Subject: [PATCH 53/75] equal to _equal --- include/nbl/builtin/hlsl/morton.hlsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 6968d414fc..d03a02a09c 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -215,12 +215,12 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) { const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); - const portable_vector_t zeros = promote>(_static_cast(0)); + const portable_vector_t zeros = promote >(_static_cast(0)); const portable_vector_t rhsCasted = _static_cast >(rhs); const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); - equal_to > equal; - return equal(xored, zeros); + equal_to > _equal; + return _equal(xored, zeros); } }; From 062ce7b632b7fc90c1b3fdaec0fdce2ddb52c1b5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 06:18:19 +0700 Subject: [PATCH 54/75] Remove some constraint in morton::code::create --- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d03a02a09c..869425b856 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -326,7 +326,7 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; From 6c824283c493c050aa85bb7710fa3d22768b4341 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:40:28 +0700 Subject: [PATCH 55/75] Remove NBL_CONSTEXPR_STATIC_INLINE_VAR macro --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index b51860a399..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -14,7 +14,6 @@ #define NBL_CONSTEXPR_STATIC constexpr static #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline -#define NBL_CONSTEXPR_INLINE_VAR constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline @@ -51,7 +50,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_STATIC const static #define NBL_CONSTEXPR_STATIC_INLINE const static #define NBL_CONSTEXPR_INLINE_FUNC inline -#define NBL_CONSTEXPR_INLINE_VAR static const #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC #define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index ba4facad01..0b890fb2b2 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -331,13 +331,13 @@ struct minus) > }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl @@ -371,13 +371,13 @@ struct minus_assign }; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t plus_assign::identity = plus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign::identity = plus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_uint64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign::identity = minus::identity; template<> -NBL_CONSTEXPR_INLINE_VAR emulated_int64_t minus_assign::identity = minus::identity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; // --------------------------------- Unary operators ------------------------------------------ // Specializations of the structs found in functional.hlsl From ca2ac6f5151b35f4570f9232356e40fc85cdaf64 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:41:02 +0700 Subject: [PATCH 56/75] Remove Bit count constraint on some of Transcoder method due to redundancy. --- include/nbl/builtin/hlsl/morton.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 869425b856..e6deaf5be4 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -342,7 +342,7 @@ struct code * * @param [in] cartesian Coordinates to encode */ - template= Bits) + template inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) { *this = create(cartesian); @@ -351,7 +351,7 @@ struct code /** * @brief Decodes this Morton code back to a set of cartesian coordinates */ - template= Bits && is_signed_v == Signed) + template == Signed) constexpr explicit operator vector() const noexcept; #endif @@ -521,8 +521,8 @@ namespace impl { // I must be of same signedness as the morton code, and be wide enough to hold each component -template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) -struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar) > { NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) { @@ -606,7 +606,7 @@ constexpr morton::code morton::code&& D* Bits <= 64) -template = Bits && is_signed_v == Signed) +template == Signed) constexpr morton::code::operator vector() const noexcept { return _static_cast, morton::code>(*this); From 4c9635d5bf6ead8d39b2775a6c12c75930732aa3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 2 Dec 2025 12:46:48 +0700 Subject: [PATCH 57/75] Use cpp syntax instead of portable macro wherever possible --- include/nbl/builtin/hlsl/functional.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index f0730a12d2..dc718e5928 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -91,7 +91,7 @@ struct reference_wrapper : enable_if_t< #else // CPP -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -136,7 +136,7 @@ ALIAS_STD(divides,/) #ifndef __HLSL_VERSION -template +template struct bit_not : std::bit_not { using type_t = T; @@ -184,11 +184,11 @@ ALIAS_STD(less_equal, <=) }; // GLM doesn't have operators on vectors #ifndef __HLSL_VERSION -#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ -struct NAME ) >\ +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template requires (concepts::Vectorial)\ +struct NAME \ {\ using type_t = T;\ - vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + vector::Dimension> operator()(const T& lhs, const T& rhs)\ {\ return glm::GLM_OP (lhs, rhs);\ }\ From 23292bd47b9ef9b5d1d4bae7f97be0fa19b68b2b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 13:58:02 +0700 Subject: [PATCH 58/75] Fix morton code constraint --- include/nbl/builtin/hlsl/morton.hlsl | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e6deaf5be4..41461a0841 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -108,18 +108,21 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + using decode_t = conditional_t < (Bits > 16), vector, vector >; + + template ) /** * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index * * @param [in] decodedValue Cartesian coordinates to interleave and shift */ - NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(T) decodedValue) { left_shift_operator > leftShift; portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ {\ interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ @@ -137,15 +140,15 @@ struct Transcoder return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); } - template 16), vector, vector > > + template /** * @brief Encodes a vector of cartesian coordinates as a Morton code * * @param [in] decodedValue Cartesian coordinates to encode */ - NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue) { - const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); @@ -157,8 +160,6 @@ struct Transcoder return encoded; } - template 16), vector, vector > - NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) /** * @brief Decodes a Morton code back to a vector of cartesian coordinates * @@ -231,7 +232,8 @@ struct Equal NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return Equal::template __call(value, interleaved); } }; @@ -281,7 +283,8 @@ struct BaseComparison NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) { using right_sign_t = conditional_t, make_unsigned_t >; - const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); return BaseComparison::template __call(value, interleaved); } }; @@ -309,6 +312,8 @@ struct code using this_signed_t = code; NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + using transcoder_t = impl::Transcoder; storage_t value; @@ -326,11 +331,12 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed), this_t> + NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - retVal.value = impl::Transcoder::encode(cartesian); + using decode_t = typename transcoder_t::decode_t; + retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; } From 5da522e319acdf93c9f0bfd581791e2c25826354 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:17:45 +0700 Subject: [PATCH 59/75] Add assert in morton code creation --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 ++ include/nbl/builtin/hlsl/morton.hlsl | 35 +++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index a5715efa15..3ca499c567 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,6 +21,7 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) +#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -57,6 +58,7 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) +#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 41461a0841..9ee59b7e78 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -25,7 +25,31 @@ namespace impl template NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; -// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) +{ + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + const bool allZero = ((val & mask) == 0); + NBL_IF_CONSTEXPR(is_signed_v) + { + const bool allOne = ((val & mask) == mask); + return allZero || allOne; + } + return allZero; +} + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector vec) +{ + array_get, T> getter; + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + if (!verifyAnyBitIntegral(getter(vec, i))) return false; + return true; +} + + +// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS --------------------------------------------------- NBL_CONSTEXPR uint16_t CodingStages = 5; @@ -108,7 +132,8 @@ NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) struct Transcoder { - using decode_t = conditional_t < (Bits > 16), vector, vector >; + using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>; + using decode_t = vector; template ) @@ -314,6 +339,9 @@ struct code using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; using transcoder_t = impl::Transcoder; + using decode_component_t = conditional_t, + typename transcoder_t::decode_component_t>; storage_t value; @@ -331,10 +359,11 @@ struct code * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class */ template - NBL_CONSTEXPR_STATIC enable_if_t && is_scalar_v && (is_signed_v == Signed && sizeof(I) == sizeof(vector_traits::scalar_type)), this_t> + NBL_CONSTEXPR_STATIC enable_if_t , this_t> create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From 812ae7b580ef4e283dc8b8c45e331a5ac85f08d9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 3 Dec 2025 23:30:37 +0700 Subject: [PATCH 60/75] Fix is_emulating concepts --- include/nbl/builtin/hlsl/concepts/core.hlsl | 8 ++++---- include/nbl/builtin/hlsl/morton.hlsl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index 4a8b848cb8..e3ff3f611f 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -72,23 +72,23 @@ namespace impl template struct is_emulating_floating_point_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; template struct is_emulating_integral_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template -NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar || impl::is_emulating_floating_point_scalar::value; //! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) template -NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; +NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar || impl::is_emulating_integral_scalar::value; } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 9ee59b7e78..67e83f6169 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -28,7 +28,7 @@ NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; template && concepts::Scalar) NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) { - NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = (~((T(1) << Bits) - 1)); + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); const bool allZero = ((val & mask) == 0); NBL_IF_CONSTEXPR(is_signed_v) { From 341d6cd033969efcd214a2bb495d1612a591eb14 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:17 +0700 Subject: [PATCH 61/75] Move storage_t to common_inc --- include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl | 2 +- include/nbl/builtin/hlsl/emulated/int64_t.hlsl | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl index 2dd7bafa41..3818814a49 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -1,4 +1,4 @@ - +using storage_t = vector; storage_t data; /** diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 0b890fb2b2..b44709bc01 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -19,7 +19,6 @@ struct emulated_int64_t; struct emulated_uint64_t { - using storage_t = vector; using this_t = emulated_uint64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; @@ -38,7 +37,6 @@ struct emulated_uint64_t struct emulated_int64_t { - using storage_t = vector; using this_t = emulated_int64_t; NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; From 2fd2cbaaedea4d20233d0869d8f2e8125398b46f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:10:49 +0700 Subject: [PATCH 62/75] Rename ImitationIntegral64Scalar to EmulatedIntegral64Scalar --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index b44709bc01..4c950859e6 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -68,7 +68,7 @@ namespace concepts { template -NBL_BOOL_CONCEPT ImitationIntegral64Scalar = same_as || same_as; +NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as || same_as; namespace impl { @@ -93,8 +93,8 @@ struct is_emulating_integral_scalar namespace impl { -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar && concepts::ImitationIntegral64Scalar && !concepts::same_as) -struct static_cast_helper && concepts::ImitationIntegral64Scalar && !concepts::same_as) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64 && concepts::EmulatedIntegralScalar64 && !concepts::same_as) +struct static_cast_helper && concepts::EmulatedIntegralScalar64 && !concepts::same_as) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) @@ -105,8 +105,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Return only the lowest bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) @@ -115,8 +115,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) { @@ -124,8 +124,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { // Set only lower bits NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) @@ -134,8 +134,8 @@ struct static_cast_helper NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) -struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::ImitationIntegral64Scalar) > +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > { NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) { @@ -188,8 +188,8 @@ constexpr emulated_uint64_t::operator I() const noexcept // ---------------------- Functional operators ------------------------ -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct left_shift_operator) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct left_shift_operator) > { using type_t = T; NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); @@ -302,8 +302,8 @@ constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) co // Specializations of the structs found in functional.hlsl // These all have to be specialized because of the identity that can't be initialized inside the struct definition -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus) > { using type_t = T; @@ -315,8 +315,8 @@ struct plus) > const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus) > { using type_t = T; @@ -340,8 +340,8 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::id // --------------------------------- Compound assignment operators ------------------------------------------ // Specializations of the structs found in functional.hlsl -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct plus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus_assign) > { using type_t = T; using base_t = plus; @@ -354,8 +354,8 @@ struct plus_assign) const static type_t identity; }; -template NBL_PARTIAL_REQ_TOP(concepts::ImitationIntegral64Scalar) -struct minus_assign) > +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus_assign) > { using type_t = T; using base_t = minus; From 1255d1c30e9f2f1b36c3f71bdad1ff26b5488038 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:21:36 +0700 Subject: [PATCH 63/75] Fix extent and remove duplicated extent specialization --- include/nbl/builtin/hlsl/type_traits.hlsl | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index b682b8da8b..257a753129 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -732,11 +732,11 @@ struct extent : integral_constant::value> {}; template struct extent : integral_constant::value> {}; -template -struct extent, 0> : integral_constant {}; +template +struct extent, I> : extent {}; template -struct extent, I> : integral_constant::value> {}; +struct extent, I> : extent {}; // Template Variables @@ -855,12 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 1> : integral_constant {}; - } } From 527129fa79399008977dfe730cf2d2ed11873fd1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 00:40:55 +0700 Subject: [PATCH 64/75] Remove redundant extent --- include/nbl/builtin/hlsl/concepts/vector.hlsl | 4 ---- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 3ea3199951..f132531cb9 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -46,10 +46,6 @@ NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::Signed } -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct extent) > : integral_constant::Dimension> {}; - } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index f153fb1062..82a1360b49 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -627,6 +627,8 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl +template +struct extent, I> : extent {}; } } #endif \ No newline at end of file From ed696efd6ea9e715591c2b2d7e98e7f0f1a1eada Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:34 +0700 Subject: [PATCH 65/75] Fix unary_minus_operator --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 27 ++++--------------- include/nbl/builtin/hlsl/functional.hlsl | 12 ++++++++- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 4c950859e6..30c23d8693 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -51,7 +51,11 @@ struct emulated_int64_t constexpr explicit emulated_int64_t(const emulated_uint64_t& other); #endif - NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC; + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC + { + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); + } }; @@ -377,27 +381,6 @@ NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; -// --------------------------------- Unary operators ------------------------------------------ -// Specializations of the structs found in functional.hlsl -template<> -struct unary_minus_operator -{ - using type_t = emulated_int64_t; - - NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand) - { - using storage_t = type_t::storage_t; - storage_t inverted = ~operand.data; - return type_t::create(_static_cast(inverted)) + _static_cast(1); - } -}; - -NBL_CONSTEXPR_INLINE_FUNC emulated_int64_t emulated_int64_t::operator-() NBL_CONST_MEMBER_FUNC -{ - unary_minus_operator unaryMinus; - return unaryMinus(NBL_DEREF_THIS); -} - } //namespace nbl } //namespace hlsl diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index dc718e5928..4d5889fe05 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -495,7 +495,17 @@ struct unary_minus_operator NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) { - return -operand; + return operand.operator-(); + } +}; + +template NBL_PARTIAL_REQ_TOP(is_fundamental_v) +struct unary_minus_operator) > +{ + using type_t = T; + NBL_CONSTEXPR_FUNC T operator()(const T operand) + { + return -operand; } }; From 4da1fb8ea99a06b39b4bb2c85c534bf538c3e78b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 01:24:58 +0700 Subject: [PATCH 66/75] Fix redundant extent specialization --- include/nbl/builtin/hlsl/emulated/vector_t.hlsl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 82a1360b49..4eb8b7bf06 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -491,6 +491,15 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3) DEFINE_SCALAR_OF_SPECIALIZATION(4) #undef DEFINE_SCALAR_OF_SPECIALIZATION +#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\ +template\ +struct extent, I> : extent {}; + +DEFINE_EXTENT_SPECIALIZATION(2) +DEFINE_EXTENT_SPECIALIZATION(3) +DEFINE_EXTENT_SPECIALIZATION(4) +#undef DEFINE_EXTENT_SPECIALIZATION + namespace impl { template @@ -627,8 +636,6 @@ NBL_EMULATED_VEC_TRUNCATION(4, 4) } //namespace impl -template -struct extent, I> : extent {}; } } #endif \ No newline at end of file From 402b8231a3c53090cfa5db751ed63fd2e328473f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 07:40:54 +0700 Subject: [PATCH 67/75] Replace [[unroll]] with NBL_UNROLL --- include/nbl/builtin/hlsl/morton.hlsl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 67e83f6169..d7a781fad9 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -42,7 +42,7 @@ template vec) { array_get, T> getter; - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) if (!verifyAnyBitIntegral(getter(vec, i))) return false; return true; @@ -178,7 +178,7 @@ struct Transcoder array_get, encode_t> getter; encode_t encoded = getter(interleaveShifted, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < Dim; i++) encoded = encoded | getter(interleaveShifted, i); @@ -196,7 +196,7 @@ struct Transcoder portable_vector_t decoded; array_set, encode_t> setter; // Write initial values into decoded - [[unroll]] + NBL_UNROLL for (uint16_t i = 0; i < Dim; i++) setter(decoded, i, encodedRightShift(encodedValue, i)); @@ -363,7 +363,7 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian) == true)); + NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; @@ -466,7 +466,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); return retVal; @@ -486,7 +486,7 @@ struct code array_get, storage_t> getter; this_t retVal; retVal.value = getter(interleaveShiftedResult, 0); - [[unroll]] + NBL_UNROLL for (uint16_t i = 1; i < D; i++) retVal.value = retVal.value | getter(interleaveShiftedResult, i); From 99b25ffd97c9b5ace8213d5c5ca334361ea3ff75 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 08:18:24 +0700 Subject: [PATCH 68/75] Allow promote to work between emulated type and non emulated type --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 9f2b58047f..6a8476e644 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -21,8 +21,7 @@ struct Promote } }; -// TODO(kevinyu): Should we enable truncation from uint64_t to emulated_vector? -template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && is_scalar_v && is_same_v::scalar_type, From>) +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) From 70a88fa975b91bad0d141e30b9b5ee9476c59f29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 13:54:11 +0700 Subject: [PATCH 69/75] Refactor unary operator in hlsl functionals --- include/nbl/builtin/hlsl/functional.hlsl | 82 +++++++++--------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 4d5889fe05..da416a538f 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -134,41 +134,6 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; -#ifndef __HLSL_VERSION - -template -struct bit_not : std::bit_not -{ - using type_t = T; -}; - -#else - -template -struct bit_not -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return ~operand; - } -}; - -// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. -// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. -template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) -struct bit_not || concepts::Vector || concepts::Matrix)) > -{ - using type_t = T; - - T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator~(); - } -}; - -#endif ALIAS_STD(equal_to, ==) }; ALIAS_STD(not_equal_to, !=) }; @@ -488,27 +453,38 @@ struct logical_right_shift_operator }; // ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- -template -struct unary_minus_operator -{ - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) - { - return operand.operator-(); - } +#ifndef __HLSL_VERSION +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME : std::NAME { \ + using type_t = T; \ }; +#else +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \ + { \ + return operand.operator OP(); \ + } \ +}; \ +template NBL_PARTIAL_REQ_TOP(concepts::Scalar || concepts::Vector || concepts::Matrix ) \ +struct NAME || concepts::Vector || concepts::Matrix ) > \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(const T operand) \ + { \ + return (OP operand); \ + } \ +}; +#endif + +NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) +NBL_UNARY_OP_SPECIALIZATION(negate, -) -template NBL_PARTIAL_REQ_TOP(is_fundamental_v) -struct unary_minus_operator) > -{ - using type_t = T; - NBL_CONSTEXPR_FUNC T operator()(const T operand) - { - return -operand; - } -}; + +#endif } //namespace nbl } //namespace hlsl From ded5d8fcd8aa348b9934f02a786b740d68c5b7a7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 15:19:21 +0700 Subject: [PATCH 70/75] Fix misplaced #endif in functional.hlsl --- include/nbl/builtin/hlsl/functional.hlsl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index da416a538f..757ad7294d 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -482,10 +482,7 @@ struct NAME || concepts::Vector || NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) NBL_UNARY_OP_SPECIALIZATION(negate, -) - - -#endif } //namespace nbl } //namespace hlsl -#endif \ No newline at end of file +#endif From 17d07177d87f38e7beff6cf8881e548ee670aa29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:50:22 +0700 Subject: [PATCH 71/75] Fix ternary_operation --- .../nbl/builtin/hlsl/emulated/int64_t.hlsl | 10 +++--- include/nbl/builtin/hlsl/functional.hlsl | 33 +++++++++++++++---- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl index 30c23d8693..1324998d1a 100644 --- a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -208,8 +208,7 @@ struct left_shift_operator(0, operand.__getLSB() << shift) : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined @@ -235,8 +234,8 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + + return select(bool(bits), shifted, operand); } // If `_bits > 63` the result is undefined @@ -262,8 +261,7 @@ struct arithmetic_right_shift_operator const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); - ternary_operator ternary; - return ternary(bool(bits), shifted, operand); + return select(bool(bits), shifted, operand); } // If `_bits > 63` or `_bits < 0` the result is undefined diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 757ad7294d..7531c5cdb9 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -235,16 +235,35 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +#ifndef __HLSL_VERSION +template requires(is_same_v, std::invoke_result_t()> ) struct ternary_operator { - using type_t = T; - - NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) - { - return select(condition, lhs, rhs); - } + using type_t = std::invoke_result_t; + + constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + { + if (condition) + return std::invoke(lhs); + else + return std::invoke(rhs); + } }; +#else +template()()),decltype(experimental::declval()())> ) +struct ternary_operator +{ + using type_t = decltype(experimental::declval().operator()); + + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + { + if (condition) + return lhs(); + else + return rhs(); + } +}; +#endif // ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- From 791b2b917d785f51616a13ea24510a5ae30b602e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 17:59:08 +0700 Subject: [PATCH 72/75] Improve some comment --- include/nbl/builtin/hlsl/morton.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index d7a781fad9..e72ec9a76b 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -59,9 +59,7 @@ struct coding_mask; template NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); -// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, -// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used - +// constexpr vector is not supported since it is not a fundamental type, which means it cannot be stored or leaked outside of constexpr context, it can only exist transiently. So the only way to return vector is to make the function consteval. Thus, we use macro to inline where it is used. #define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ truncate >(\ vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ From ac2070e0998a977d87ac524412f63efa6f560ea4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 19:38:54 +0700 Subject: [PATCH 73/75] Remove NBL_ASSERT --- include/nbl/builtin/hlsl/cpp_compat/basic.h | 2 -- include/nbl/builtin/hlsl/morton.hlsl | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3ca499c567..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -21,7 +21,6 @@ #define NBL_CONSTEXPR_OOL_MEMBER constexpr #define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) -#define NBL_ASSERT(...) assert(__VA_ARGS__) namespace nbl::hlsl { @@ -58,7 +57,6 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_OOL_MEMBER const #define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) -#define NBL_ASSERT(...) namespace nbl { diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index e72ec9a76b..08b2b1ccfb 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -362,6 +362,7 @@ struct code { this_t retVal; NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); + assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian)); return retVal; From a4dabdf9f267c93da1a340aa51600ed9443004e2 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 4 Dec 2025 20:45:21 +0700 Subject: [PATCH 74/75] Simplify mix helper by using select_helper in some specialization --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 50 ++----------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 7850fd7cf3..67a9f67d8f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -268,20 +268,6 @@ struct mix_helper) > } }; -template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) -struct mix_helper) > -{ - using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; - // for a component of a that is false, the corresponding component of x is returned - // for a component of a that is true, the corresponding component of y is returned - // so we make sure this is correct when calling the operation - static inline return_t __call(const T x, const T y, const U a) - { - return spirv::select(a, y, x); - } -}; - template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) struct determinant_helper::Square) > { @@ -980,43 +966,13 @@ struct mix_helper -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) +struct mix_helper && concepts::BooleanScalar) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - using traitsT = hlsl::vector_traits; - array_get getterT; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), a)); - - return output; - } -}; - -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) -struct mix_helper && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) > -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) - { - using traitsT = hlsl::vector_traits; - using traitsU = hlsl::vector_traits; - array_get getterT; - array_get getterU; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), getterU(a, i))); - - return output; + return select_helper(a, y, x); } }; From 1a32ed257d717b57ba4a51b1a7f529a21c3cec1c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 8 Dec 2025 20:58:54 +0700 Subject: [PATCH 75/75] Remove NBL_ASSERT usage --- include/nbl/builtin/hlsl/morton.hlsl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl index 08b2b1ccfb..4e90fd4c91 100644 --- a/include/nbl/builtin/hlsl/morton.hlsl +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -361,7 +361,6 @@ struct code create(NBL_CONST_REF_ARG(vector) cartesian) { this_t retVal; - NBL_ASSERT((impl::verifyAnyBitIntegralVec(cartesian))); assert((impl::verifyAnyBitIntegralVec(cartesian))); using decode_t = typename transcoder_t::decode_t; retVal.value = transcoder_t::encode(_static_cast(cartesian));