diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 7eca7d51df..66442a11a1 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -19,7 +19,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -27,7 +27,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -35,7 +35,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -43,7 +43,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -51,7 +51,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -59,7 +59,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -67,7 +67,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -75,7 +75,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -83,7 +83,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index 0e2e9d1291..58f697e19c 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -406,7 +406,7 @@ template struct is_ggx : impl::is_ggx {}; template -NBL_CONSTEXPR bool is_ggx_v = is_ggx::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_ggx_v = is_ggx::value; } } diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index da04c49b51..7f59d103fa 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -436,22 +436,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index dcbafae8a5..e3ff3f611f 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -72,13 +72,23 @@ namespace impl template struct is_emulating_floating_point_scalar { - NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; + NBL_CONSTEXPR_STATIC_INLINE bool value = false; +}; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = false; }; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template -NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +NBL_BOOL_CONCEPT FloatingPointLikeScalar = FloatingPointScalar || impl::is_emulating_floating_point_scalar::value; + +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = IntegralScalar || impl::is_emulating_integral_scalar::value; } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..f132531cb9 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,14 +40,12 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } -template -NBL_PARTIAL_REQ_TOP(concepts::Vectorial) -struct extent) > : integral_constant::Dimension> {}; - } } #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,5 +5,9 @@ // it includes vector and matrix #include #include +#include + +// Had to push some stuff here to avoid circular dependencies +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 87baa1f0d6..a5715efa15 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -3,39 +3,12 @@ #include -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} #ifndef __HLSL_VERSION #include #define ARROW -> +#define NBL_DEREF_THIS (*this) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static @@ -43,6 +16,10 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR_INLINE_FUNC constexpr inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR constexpr inline +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR constexpr +#define NBL_CONSTEXPR_OOL_MEMBER constexpr +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER constexpr inline #define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl @@ -67,6 +44,7 @@ namespace nbl::hlsl #else #define ARROW .arrow(). +#define NBL_DEREF_THIS this #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC #define NBL_CONSTEXPR_STATIC const static @@ -74,6 +52,10 @@ namespace nbl::hlsl #define NBL_CONSTEXPR_INLINE_FUNC inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline #define NBL_CONST_MEMBER_FUNC +#define NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR const static +#define NBL_CONSTEXPR_FUNC_SCOPE_VAR const +#define NBL_CONSTEXPR_OOL_MEMBER const +#define NBL_CONSTEXPR_INLINE_OOL_MEMBER const #define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl @@ -102,4 +84,33 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_INLINE To cast(From u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; +} + +template +NBL_CONSTEXPR_INLINE_FUNC To _static_cast(From v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index cd89ce45d1..67a9f67d8f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -27,7 +27,7 @@ template::type; - NBL_CONSTEXPR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; + NBL_CONSTEXPR_FUNC_SCOPE_VAR UnsignedInteger Mask = (UnsignedInteger(0) - 1) >> 1; UnsignedInteger absVal = val & Mask; return absVal > (ieee754::traits::specialValueExp << ieee754::traits::mantissaBitCnt); } @@ -90,6 +90,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -119,6 +121,12 @@ struct nMax_helper; template struct nClamp_helper; template +struct addCarry_helper; +template +struct subBorrow_helper; +template +struct undef_helper; +template struct fma_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -133,8 +141,8 @@ struct fma_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -156,8 +164,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), bool) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), bool) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), bool) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -179,6 +188,10 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> @@ -255,20 +268,6 @@ struct mix_helper) > } }; -template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) -struct mix_helper) > -{ - using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; - // for a component of a that is false, the corresponding component of x is returned - // for a component of a that is true, the corresponding component of y is returned - // so we make sure this is correct when calling the operation - static inline return_t __call(const T x, const T y, const U a) - { - return spirv::select(a, y, x); - } -}; - template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) struct determinant_helper::Square) > { @@ -629,6 +628,72 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = T(retVal.result < operand1); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = T(operand1 < operand2); + return retVal; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC T __call() + { + T t; + return t; + } +}; + template requires concepts::FloatingPointScalar struct fma_helper @@ -901,43 +966,13 @@ struct mix_helper -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::BooleanScalar) -struct mix_helper && concepts::BooleanScalar) > -{ - using return_t = T; - static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) - { - using traitsT = hlsl::vector_traits; - array_get getterT; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), a)); - - return output; - } -}; - -template -NBL_PARTIAL_REQ_TOP(VECTOR_SPECIALIZATION_CONCEPT && !impl::MixCallingBuiltins && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) -struct mix_helper && concepts::Boolean && concepts::Vectorial && vector_traits::Dimension == vector_traits::Dimension) > +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && concepts::BooleanScalar) +struct mix_helper && concepts::BooleanScalar) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) { - using traitsT = hlsl::vector_traits; - using traitsU = hlsl::vector_traits; - array_get getterT; - array_get getterU; - array_set setter; - - return_t output; - for (uint32_t i = 0; i < traitsT::Dimension; ++i) - setter(output, i, mix_helper::__call(getterT(x, i), getterT(y, i), getterU(a, i))); - - return output; + return select_helper(a, y, x); } }; diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index 7198bae563..27518222b3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -150,6 +156,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * @@ -217,6 +229,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..6a8476e644 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -12,68 +12,39 @@ namespace impl { // partial specialize this for `T=matrix|vector` and `U=matrix|vector|scalar_t` -template +template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } }; -#ifdef __HLSL_VERSION - -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> +template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) +struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC To operator()(const From v) { - vector promoted = {Scalar(v), Scalar(v)}; - return promoted; + array_set setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, v); + return output; } }; -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -template -struct Promote, U> -{ - enable_if_t::value && is_scalar::value, vector > operator()(U v) - { - vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; - return promoted; - } -}; - -#endif - } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(NBL_CONST_REF_ARG(U) v) { impl::Promote _promote; return _promote(v); } + } } diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..ffe3d12641 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,58 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace concepts +{ + template + NBL_BOOL_CONCEPT can_truncate_vector = concepts::Vectorial && concepts::Vectorial && concepts::same_as::scalar_type, typename vector_traits::scalar_type > && vector_traits::Dimension <= vector_traits::Dimension; +} + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::can_truncate_vector) +struct Truncate) > +{ + NBL_CONSTEXPR_FUNC To operator()(const From v) + { + array_get::scalar_type> getter; + array_set::scalar_type> setter; + To output; + [[unroll]] + for (int i = 0; i < vector_traits::Dimension; ++i) + setter(output, i, getter(v, i)); + return output; + } + +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index 9872675e3a..da32fab7b0 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -471,25 +471,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -636,10 +636,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl new file mode 100644 index 0000000000..3818814a49 --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_common_member_inc.hlsl @@ -0,0 +1,155 @@ +using storage_t = vector; +storage_t data; + +/** +* @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern +* +* @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(storage_t) _data) +{ + this_t retVal; + retVal.data = _data; + return retVal; +} + +/** +* @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern +* +* @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated +* @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated +*/ +NBL_CONSTEXPR_STATIC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) +{ + return create(storage_t(lo, hi)); +} + +// ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- +// GLM requires these for vector casts + +#ifndef __HLSL_VERSION + +template +constexpr explicit operator I() const noexcept; + +#endif + +// ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC +{ + return data.x; +} + +NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC +{ + return data.y; +} + +// ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data & rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data | rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(data ^ rhs.data); + return retVal; +} + +NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC +{ + this_t retVal = create(~data); + return retVal; +} + +// Only valid in CPP +#ifndef __HLSL_VERSION +constexpr inline this_t operator>>(uint32_t bits) const; + +constexpr inline this_t operator<<(uint32_t bits) const; + +constexpr inline this_t& operator&=(const this_t& val) +{ + data &= val.data; + return *this; +} + +constexpr inline this_t& operator|=(const this_t& val) +{ + data |= val.data; + return *this; +} + +constexpr inline this_t& operator^=(const this_t& val) +{ + data ^= val.data; + return *this; +} + +#endif + +// ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + +NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); +} + +NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); +} + +// ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- +NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + equal_to equals; + return all(equals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); +} + +NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; +} + +NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator>(rhs); +} + +NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC +{ + return !operator<(rhs); +} diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..1324998d1a --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,409 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" + +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + +namespace nbl +{ +namespace hlsl +{ + +struct emulated_int64_t; + +struct emulated_uint64_t +{ + using this_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = false; + + #include "int64_common_member_inc.hlsl" + + #ifndef __HLSL_VERSION + emulated_uint64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_uint64_t(const I& toEmulate); + + constexpr explicit emulated_uint64_t(const emulated_int64_t& other); + #endif +}; + + +struct emulated_int64_t +{ + using this_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC_INLINE bool Signed = true; + + #include "int64_common_member_inc.hlsl" + + #ifndef __HLSL_VERSION + emulated_int64_t() = default; + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_t(const I& toEmulate); + + constexpr explicit emulated_int64_t(const emulated_uint64_t& other); + #endif + + NBL_CONSTEXPR_FUNC emulated_int64_t operator-() NBL_CONST_MEMBER_FUNC + { + storage_t inverted = ~data; + return create(_static_cast(inverted)) + _static_cast(1); + } + +}; + +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ + +template +NBL_BOOL_CONCEPT EmulatedIntegralScalar64 = same_as || same_as; + +namespace impl +{ + +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; + +template<> +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} + + +} + + +namespace impl +{ + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64 && concepts::EmulatedIntegralScalar64 && !concepts::same_as) +struct static_cast_helper && concepts::EmulatedIntegralScalar64 && !concepts::same_as) > +{ + + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) other) + { + To retVal; + retVal.data = other.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + // Return only the lowest bits + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) + { + return _static_cast(val.data.x); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(To) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) val) + { + return bit_cast(val.data); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) <= sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + // Set only lower bits + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) + { + return To::create(_static_cast(i), uint32_t(0)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) +struct static_cast_helper && (sizeof(From) > sizeof(uint32_t)) && concepts::EmulatedIntegralScalar64) > +{ + NBL_CONSTEXPR_STATIC To cast(NBL_CONST_REF_ARG(From) i) + { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else + To retVal; + retVal.data = bit_cast >(i); + return retVal; + #endif + } +}; + +} //namespace impl + +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +constexpr emulated_int64_t::emulated_int64_t(const emulated_uint64_t& other) : data(other.data) {} + +constexpr emulated_uint64_t::emulated_uint64_t(const emulated_int64_t& other) : data(other.data) {} + +template +constexpr emulated_int64_t::emulated_int64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + +template +constexpr emulated_uint64_t::emulated_uint64_t(const I& toEmulate) +{ + *this = _static_cast(toEmulate); +} + +template +constexpr emulated_int64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +template +constexpr emulated_uint64_t::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + +// ---------------------- Functional operators ------------------------ + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct left_shift_operator) > +{ + using type_t = T; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + return select(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + + return select(bool(bits), shifted, operand); + } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + return select(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +#ifndef __HLSL_VERSION + +constexpr inline emulated_int64_t emulated_int64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus) > +{ + using type_t = T; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs + rhs; + } + + const static type_t identity; +}; + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus) > +{ + using type_t = T; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs - rhs; + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus::identity = _static_cast(int64_t(0)); + +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct plus_assign) > +{ + using type_t = T; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template NBL_PARTIAL_REQ_TOP(concepts::EmulatedIntegralScalar64) +struct minus_assign) > +{ + using type_t = T; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR emulated_int64_t minus_assign::identity = minus::identity; + +} //namespace nbl +} //namespace hlsl + +// Declare them as signed/unsigned versions of each other + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..4eb8b7bf06 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -23,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -31,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -39,9 +40,7 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -55,7 +54,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -65,7 +64,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -75,9 +74,7 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -91,7 +88,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -103,7 +100,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -115,133 +112,207 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); +template +struct emulated_vector; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) - { - this_t output; +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator OP());\ + return output;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) - { - this_t output; +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - return output; - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR - return sum; - } -}; +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -250,7 +321,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -259,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -269,14 +340,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP + +template +struct emulated_vector, CRTP> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC this_t create(this_t other) { this_t output; @@ -287,17 +358,17 @@ struct emulated_vector : CRTP } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC this_t create(vector other) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -306,7 +377,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -315,7 +386,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -338,9 +409,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -420,12 +491,21 @@ DEFINE_SCALAR_OF_SPECIALIZATION(3) DEFINE_SCALAR_OF_SPECIALIZATION(4) #undef DEFINE_SCALAR_OF_SPECIALIZATION +#define DEFINE_EXTENT_SPECIALIZATION(DIMENSION)\ +template\ +struct extent, I> : extent {}; + +DEFINE_EXTENT_SPECIALIZATION(2) +DEFINE_EXTENT_SPECIALIZATION(3) +DEFINE_EXTENT_SPECIALIZATION(4) +#undef DEFINE_EXTENT_SPECIALIZATION + namespace impl { template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -438,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -452,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -470,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -483,7 +564,77 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct Truncate, emulated_vector_t##M >\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..7531c5cdb9 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -79,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -90,7 +91,6 @@ struct reference_wrapper : enable_if_t< #else // CPP - #define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; @@ -135,16 +135,56 @@ ALIAS_STD(divides,/) }; -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// ------------------------ Compound assignment operators ---------------------- +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template requires (concepts::Vectorial)\ +struct NAME \ +{\ + using type_t = T;\ + vector::Dimension> operator()(const T& lhs, const T& rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION -#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- + +#define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ using base_t = NAME ; \ base_t baseOp; \ @@ -163,9 +203,9 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -195,18 +235,273 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +#ifndef __HLSL_VERSION +template requires(is_same_v, std::invoke_result_t()> ) +struct ternary_operator +{ + using type_t = std::invoke_result_t; + + constexpr inline type_t operator()(const bool condition, const F1& lhs, const F2& rhs) + { + if (condition) + return std::invoke(lhs); + else + return std::invoke(rhs); + } +}; +#else +template()()),decltype(experimental::declval()())> ) struct ternary_operator +{ + using type_t = decltype(experimental::declval().operator()); + + NBL_CONSTEXPR_FUNC type_t operator()(const bool condition, NBL_CONST_REF_ARG(F1) lhs, NBL_CONST_REF_ARG(F2) rhs) + { + if (condition) + return lhs(); + else + return rhs(); + } +}; +#endif + +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > { using type_t = T; + using scalar_t = scalar_type_t; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) { - return condition ? lhs : rhs; + return operand << bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand << bits; } }; -} -} +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } -#endif \ No newline at end of file + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + +// ----------------------------------------------------------------- UNARY OPERATORS -------------------------------------------------------------------- +#ifndef __HLSL_VERSION +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME : std::NAME { \ + using type_t = T; \ +}; +#else +#define NBL_UNARY_OP_SPECIALIZATION(NAME, OP) template \ +struct NAME \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand) \ + { \ + return operand.operator OP(); \ + } \ +}; \ +template NBL_PARTIAL_REQ_TOP(concepts::Scalar || concepts::Vector || concepts::Matrix ) \ +struct NAME || concepts::Vector || concepts::Matrix ) > \ +{ \ + using type_t = T; \ + NBL_CONSTEXPR_FUNC T operator()(const T operand) \ + { \ + return (OP operand); \ + } \ +}; +#endif + +NBL_UNARY_OP_SPECIALIZATION(bit_not, ~) +NBL_UNARY_OP_SPECIALIZATION(negate, -) + +} //namespace nbl +} //namespace hlsl + +#endif diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 307a11101f..29c48a79d1 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -90,7 +90,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -98,20 +98,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -119,21 +119,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -240,13 +240,13 @@ struct flipSignIfRHSNegative_helper -NBL_CONSTEXPR_INLINE_FUNC T flipSign(T val, U flip) +NBL_CONSTEXPR_FUNC T flipSign(T val, U flip) { return impl::flipSign_helper::__call(val, flip); } template -NBL_CONSTEXPR_INLINE_FUNC T flipSignIfRHSNegative(T val, T flip) +NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip) { return impl::flipSignIfRHSNegative_helper::__call(val, flip); } diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 6eee1fae6e..d3f5b167f6 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -123,9 +123,9 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { #ifdef __HLSL_VERSION - NBL_CONSTEXPR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); #else - NBL_CONSTEXPR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); + NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); #endif const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; diff --git a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl index 3bcfbb2388..cd402d0cd4 100644 --- a/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl +++ b/include/nbl/builtin/hlsl/math/quadrature/gauss_legendre/impl.hlsl @@ -14,25 +14,25 @@ namespace float_t_namespace { -NBL_CONSTEXPR float_t xi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_2[2] = { TYPED_NUMBER(-0.5773502691896257), TYPED_NUMBER(0.5773502691896257) }; -NBL_CONSTEXPR float_t xi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_3[3] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.7745966692414833), TYPED_NUMBER(0.7745966692414833) }; -NBL_CONSTEXPR float_t xi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_4[4] = { TYPED_NUMBER(-0.3399810435848562), TYPED_NUMBER(0.3399810435848562), TYPED_NUMBER(-0.8611363115940525), TYPED_NUMBER(0.8611363115940525) }; -NBL_CONSTEXPR float_t xi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_5[5] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.5384693101056830), TYPED_NUMBER(0.5384693101056830), @@ -40,7 +40,7 @@ NBL_CONSTEXPR float_t xi_5[5] = { TYPED_NUMBER(0.9061798459386639) }; -NBL_CONSTEXPR float_t xi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_6[6] = { TYPED_NUMBER(0.6612093864662645), TYPED_NUMBER(-0.6612093864662645), TYPED_NUMBER(-0.2386191860831969), @@ -49,7 +49,7 @@ NBL_CONSTEXPR float_t xi_6[6] = { TYPED_NUMBER(0.9324695142031520) }; -NBL_CONSTEXPR float_t xi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_7[7] = { TYPED_NUMBER(0.0), TYPED_NUMBER(0.4058451513773971), TYPED_NUMBER(-0.4058451513773971), @@ -59,7 +59,7 @@ NBL_CONSTEXPR float_t xi_7[7] = { TYPED_NUMBER(0.9491079123427585) }; -NBL_CONSTEXPR float_t xi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_8[8] = { TYPED_NUMBER(-0.1834346424956498), TYPED_NUMBER(0.1834346424956498), TYPED_NUMBER(-0.5255324099163289), @@ -70,7 +70,7 @@ NBL_CONSTEXPR float_t xi_8[8] = { TYPED_NUMBER(0.9602898564975362) }; -NBL_CONSTEXPR float_t xi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_9[9] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.8360311073266357), TYPED_NUMBER(0.8360311073266357), @@ -82,7 +82,7 @@ NBL_CONSTEXPR float_t xi_9[9] = { TYPED_NUMBER(0.6133714327005903) }; -NBL_CONSTEXPR float_t xi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_10[10] = { TYPED_NUMBER(-0.1488743389816312), TYPED_NUMBER(0.1488743389816312), TYPED_NUMBER(-0.4333953941292471), @@ -95,7 +95,7 @@ NBL_CONSTEXPR float_t xi_10[10] = { TYPED_NUMBER(0.9739065285171717) }; -NBL_CONSTEXPR float_t xi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_11[11] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2695431559523449), TYPED_NUMBER(0.2695431559523449), @@ -109,7 +109,7 @@ NBL_CONSTEXPR float_t xi_11[11] = { TYPED_NUMBER(0.9782286581460569) }; -NBL_CONSTEXPR float_t xi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_12[12] = { TYPED_NUMBER(-0.1252334085114689), TYPED_NUMBER(0.1252334085114689), TYPED_NUMBER(-0.3678314989981801), @@ -124,7 +124,7 @@ NBL_CONSTEXPR float_t xi_12[12] = { TYPED_NUMBER(0.9815606342467192) }; -NBL_CONSTEXPR float_t xi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_13[13] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2304583159551347), TYPED_NUMBER(0.2304583159551347), @@ -140,7 +140,7 @@ NBL_CONSTEXPR float_t xi_13[13] = { TYPED_NUMBER(0.9841830547185881) }; -NBL_CONSTEXPR float_t xi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_14[14] = { TYPED_NUMBER(-0.1080549487073436), TYPED_NUMBER(0.1080549487073436), TYPED_NUMBER(-0.3191123689278897), @@ -157,7 +157,7 @@ NBL_CONSTEXPR float_t xi_14[14] = { TYPED_NUMBER(0.9862838086968123) }; -NBL_CONSTEXPR float_t xi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t xi_15[15] = { TYPED_NUMBER(0.0), TYPED_NUMBER(-0.2011940939974345), TYPED_NUMBER(0.2011940939974345), @@ -175,25 +175,25 @@ NBL_CONSTEXPR float_t xi_15[15] = { TYPED_NUMBER(0.9879925180204854) }; -NBL_CONSTEXPR float_t wi_2[2] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_2[2] = { TYPED_NUMBER(1.0000000000000000), TYPED_NUMBER(1.0000000000000000) }; -NBL_CONSTEXPR float_t wi_3[3] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_3[3] = { TYPED_NUMBER(0.8888888888888888), TYPED_NUMBER(0.5555555555555555), TYPED_NUMBER(0.5555555555555555) }; -NBL_CONSTEXPR float_t wi_4[4] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_4[4] = { TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.6521451548625461), TYPED_NUMBER(0.3478548451374538), TYPED_NUMBER(0.3478548451374538) }; -NBL_CONSTEXPR float_t wi_5[5] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_5[5] = { TYPED_NUMBER(0.5688888888888888), TYPED_NUMBER(0.4786286704993664), TYPED_NUMBER(0.4786286704993664), @@ -201,7 +201,7 @@ NBL_CONSTEXPR float_t wi_5[5] = { TYPED_NUMBER(0.2369268850561890) }; -NBL_CONSTEXPR float_t wi_6[6] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_6[6] = { TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.3607615730481386), TYPED_NUMBER(0.4679139345726910), @@ -210,7 +210,7 @@ NBL_CONSTEXPR float_t wi_6[6] = { TYPED_NUMBER(0.1713244923791703) }; -NBL_CONSTEXPR float_t wi_7[7] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_7[7] = { TYPED_NUMBER(0.4179591836734693), TYPED_NUMBER(0.3818300505051189), TYPED_NUMBER(0.3818300505051189), @@ -220,7 +220,7 @@ NBL_CONSTEXPR float_t wi_7[7] = { TYPED_NUMBER(0.1294849661688696) }; -NBL_CONSTEXPR float_t wi_8[8] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_8[8] = { TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3626837833783619), TYPED_NUMBER(0.3137066458778872), @@ -231,7 +231,7 @@ NBL_CONSTEXPR float_t wi_8[8] = { TYPED_NUMBER(0.1012285362903762) }; -NBL_CONSTEXPR float_t wi_9[9] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_9[9] = { TYPED_NUMBER(0.3302393550012597), TYPED_NUMBER(0.1806481606948574), TYPED_NUMBER(0.1806481606948574), @@ -243,7 +243,7 @@ NBL_CONSTEXPR float_t wi_9[9] = { TYPED_NUMBER(0.2606106964029354) }; -NBL_CONSTEXPR float_t wi_10[10] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_10[10] = { TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2955242247147528), TYPED_NUMBER(0.2692667193099963), @@ -256,7 +256,7 @@ NBL_CONSTEXPR float_t wi_10[10] = { TYPED_NUMBER(0.0666713443086881) }; -NBL_CONSTEXPR float_t wi_11[11] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_11[11] = { TYPED_NUMBER(0.2729250867779006), TYPED_NUMBER(0.2628045445102466), TYPED_NUMBER(0.2628045445102466), @@ -270,7 +270,7 @@ NBL_CONSTEXPR float_t wi_11[11] = { TYPED_NUMBER(0.0556685671161736) }; -NBL_CONSTEXPR float_t wi_12[12] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_12[12] = { TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2491470458134027), TYPED_NUMBER(0.2334925365383548), @@ -285,7 +285,7 @@ NBL_CONSTEXPR float_t wi_12[12] = { TYPED_NUMBER(0.0471753363865118) }; -NBL_CONSTEXPR float_t wi_13[13] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_13[13] = { TYPED_NUMBER(0.2325515532308739), TYPED_NUMBER(0.2262831802628972), TYPED_NUMBER(0.2262831802628972), @@ -301,7 +301,7 @@ NBL_CONSTEXPR float_t wi_13[13] = { TYPED_NUMBER(0.0404840047653158) }; -NBL_CONSTEXPR float_t wi_14[14] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_14[14] = { TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2152638534631577), TYPED_NUMBER(0.2051984637212956), @@ -318,7 +318,7 @@ NBL_CONSTEXPR float_t wi_14[14] = { TYPED_NUMBER(0.0351194603317518) }; -NBL_CONSTEXPR float_t wi_15[15] = { +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t wi_15[15] = { TYPED_NUMBER(0.2025782419255612), TYPED_NUMBER(0.1984314853271115), TYPED_NUMBER(0.1984314853271115), diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..4e90fd4c91 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,656 @@ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" + +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegral(T val) +{ + NBL_CONSTEXPR_FUNC_SCOPE_VAR T mask = ~((T(1) << Bits) - 1); + const bool allZero = ((val & mask) == 0); + NBL_IF_CONSTEXPR(is_signed_v) + { + const bool allOne = ((val & mask) == mask); + return allZero || allOne; + } + return allZero; +} + +template && concepts::Scalar) +NBL_CONSTEXPR_FUNC bool verifyAnyBitIntegralVec(vector vec) +{ + array_get, T> getter; + NBL_UNROLL + for (uint16_t i = 0; i < Dim; i++) + if (!verifyAnyBitIntegral(getter(vec, i))) return false; + return true; +} + + +// --------------------------------------------------------- MORTON ENCOE/DECODE MASKS --------------------------------------------------- + +NBL_CONSTEXPR uint16_t CodingStages = 5; + +template +struct coding_mask; + +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +// constexpr vector is not supported since it is not a fundamental type, which means it cannot be stored or leaked outside of constexpr context, it can only exist transiently. So the only way to return vector is to make the function consteval. Thus, we use macro to inline where it is used. +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) + +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; + +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; + +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ +}; + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) + +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder +{ + using decode_component_t = conditional_t<(Bits > 16), uint32_t, uint16_t>; + using decode_t = vector; + + template ) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(T) decodedValue) + { + left_shift_operator > leftShift; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + + // Read this to understand how interleaving and spreading bits works https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/ + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ + } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); + } + + template + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC encode_t encode(NBL_CONST_REF_ARG(T) decodedValue) + { + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + + NBL_UNROLL + for (uint16_t i = 1; i < Dim; i++) + encoded = encoded | getter(interleaveShifted, i); + + return encoded; + } + + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator encodedRightShift; + portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + NBL_UNROLL + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedRightShift(encodedValue, i)); + + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ + } + + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & coding_mask_v); + } +}; + +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); + +template +struct Equal; + +template +struct Equal +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t zeros = promote >(_static_cast(0)); + + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); + equal_to > _equal; + return _equal(xored, zeros); + } +}; + +template +struct Equal +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); + return Equal::template __call(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); + ComparisonOp comparison; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); + + return comparison(thisCoord, rhsCoord); + } + + } +}; + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + using transcoder_t = Transcoder; + const portable_vector_t interleaved = _static_cast >(transcoder_t::interleaveShift(_static_cast(rhs))); + return BaseComparison::template __call(value, interleaved); + } +}; + +template +struct LessThan : BaseComparison > > {}; + +template +struct LessEqual : BaseComparison > > {}; + +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEqual : BaseComparison > > {}; + +} //namespace impl + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + using this_signed_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + using transcoder_t = impl::Transcoder; + using decode_component_t = conditional_t, + typename transcoder_t::decode_component_t>; + + storage_t value; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif + + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC enable_if_t , this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + this_t retVal; + assert((impl::verifyAnyBitIntegralVec(cartesian))); + using decode_t = typename transcoder_t::decode_t; + retVal.value = transcoder_t::encode(_static_cast(cartesian)); + return retVal; + } + + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + template + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + template == Signed) + constexpr explicit operator vector() const noexcept; + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr this_t operator<<(uint16_t bits) const; + + constexpr this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + NBL_UNROLL + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; + } + + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + NBL_UNROLL + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::Equal::template __call(value, rhs); + } + + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return !equal(rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessEqual::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThanEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterEqual::template __call(value, rhs); + } + +}; + +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar) > +{ + NBL_CONSTEXPR_STATIC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +template && D* Bits <= 64) +template == Signed) +constexpr morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code>(*this); +} + +#endif + +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 8fb13db872..7734dea15f 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -41,7 +41,12 @@ struct countl_zero : impl::countl_zero static_assert(is_integral::value, "countl_zero type parameter must be an integral type"); }; template -NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T countl_zero_v = countl_zero::value; + +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_pot_v = is_pot::value; template struct log2 @@ -49,7 +54,12 @@ struct log2 NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -59,7 +69,7 @@ struct rotl NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X << r) | (X >> (N - r))) : (X >> (-r)) | (X << (N - (-r))); }; template -NBL_CONSTEXPR T rotl_v = rotl::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotl_v = rotl::value; template struct rotr @@ -69,7 +79,7 @@ struct rotr NBL_CONSTEXPR_STATIC_INLINE T value = (S >= 0) ? ((X >> r) | (X << (N - r))) : (X << (-r)) | (X >> (N - (-r))); }; template -NBL_CONSTEXPR T rotr_v = rotr::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T rotr_v = rotr::value; template struct align_up @@ -77,12 +87,7 @@ struct align_up NBL_CONSTEXPR_STATIC_INLINE uint64_t value = X ? (((X-1)/M+1)*M):0; }; template -NBL_CONSTEXPR uint64_t align_up_v = align_up::value; - -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t align_up_v = align_up::value; template struct max @@ -90,7 +95,7 @@ struct max NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T max_v = max::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T max_v = max::value; template struct min @@ -98,7 +103,18 @@ struct min NBL_CONSTEXPR_STATIC_INLINE T value = X -NBL_CONSTEXPR T min_v = min::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +// TODO: should rename log2 to log2_floor +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t round_down_to_pot_v = round_down_to_pot::value; template struct find_lsb @@ -106,7 +122,7 @@ struct find_lsb NBL_CONSTEXPR_STATIC_INLINE uint16_t value = log2::value; }; template -NBL_CONSTEXPR uint64_t find_lsb_v = find_lsb::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t find_lsb_v = find_lsb::value; } } } diff --git a/include/nbl/builtin/hlsl/numbers.hlsl b/include/nbl/builtin/hlsl/numbers.hlsl index 6671a44756..4594596590 100644 --- a/include/nbl/builtin/hlsl/numbers.hlsl +++ b/include/nbl/builtin/hlsl/numbers.hlsl @@ -11,33 +11,33 @@ namespace numbers { template -NBL_CONSTEXPR float_t e = float_t(2.718281828459045); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t e = float_t(2.718281828459045); template -NBL_CONSTEXPR float_t log2e = float_t(1.4426950408889634); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log2e = float_t(1.4426950408889634); template -NBL_CONSTEXPR float_t log10e = float_t(0.4342944819032518); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t log10e = float_t(0.4342944819032518); template -NBL_CONSTEXPR float_t pi = float_t(3.141592653589793); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t pi = float_t(3.141592653589793); template -NBL_CONSTEXPR float_t inv_pi = float_t(0.3183098861837907); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_pi = float_t(0.3183098861837907); template -NBL_CONSTEXPR float_t inv_sqrtpi = float_t(0.5641895835477563); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrtpi = float_t(0.5641895835477563); template -NBL_CONSTEXPR float_t ln2 = float_t(0.6931471805599453); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln2 = float_t(0.6931471805599453); template -NBL_CONSTEXPR float_t inv_ln2 = float_t(1.44269504088896); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_ln2 = float_t(1.44269504088896); template -NBL_CONSTEXPR float_t ln10 = float_t(2.302585092994046); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t ln10 = float_t(2.302585092994046); template -NBL_CONSTEXPR float_t sqrt2 = float_t(1.4142135623730951); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt2 = float_t(1.4142135623730951); template -NBL_CONSTEXPR float_t sqrt3 = float_t(1.7320508075688772); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t sqrt3 = float_t(1.7320508075688772); template -NBL_CONSTEXPR float_t inv_sqrt3 = float_t(0.5773502691896257); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t inv_sqrt3 = float_t(0.5773502691896257); template -NBL_CONSTEXPR float_t egamma = float_t(0.5772156649015329); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t egamma = float_t(0.5772156649015329); template -NBL_CONSTEXPR float_t phi = float_t(1.618033988749895); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float_t phi = float_t(1.618033988749895); } } diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..2dffa40a2d --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +template +using portable_uint64_t = emulated_uint64_t; +template +using portable_int64_t = emulated_int64_t; +#else +template +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +template +using portable_uint64_t = uint64_t; +template +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -36,19 +37,53 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index b71558c49d..02495e2f2e 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,6 +4,8 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" @@ -11,7 +13,6 @@ #include #include #include -#include namespace nbl { @@ -115,7 +116,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] @@ -341,6 +347,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, bool> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index a9701619dd..257a753129 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -636,28 +636,39 @@ template using conditional_t = typename conditional::type; -// Template variables +// Template Variables +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T integral_constant_v = integral_constant::value; template -NBL_CONSTEXPR bool is_same_v = is_same::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_same_v = is_same::value; template -NBL_CONSTEXPR bool is_unsigned_v = is_unsigned::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_unsigned_v = is_unsigned::value; template -NBL_CONSTEXPR bool is_integral_v = is_integral::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_integral_v = is_integral::value; template -NBL_CONSTEXPR bool is_floating_point_v = is_floating_point::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_floating_point_v = is_floating_point::value; template -NBL_CONSTEXPR bool is_signed_v = is_signed::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_signed_v = is_signed::value; template -NBL_CONSTEXPR bool is_scalar_v = is_scalar::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_scalar_v = is_scalar::value; template -NBL_CONSTEXPR uint64_t size_of_v = size_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t size_of_v = size_of::value; template -NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t alignment_of_v = alignment_of::value; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_fundamental_v = is_fundamental::value; + // Overlapping definitions template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { @@ -674,7 +685,7 @@ template struct is_vector > : bool_constant {}; template -NBL_CONSTEXPR bool is_vector_v = is_vector::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_vector_v = is_vector::value; #ifndef __HLSL_VERSION template @@ -685,7 +696,7 @@ template struct is_matrix > : bool_constant {}; template -NBL_CONSTEXPR bool is_matrix_v = is_matrix::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_matrix_v = is_matrix::value; template @@ -721,16 +732,16 @@ struct extent : integral_constant::value> {}; template struct extent : integral_constant::value> {}; -template -struct extent, 0> : integral_constant {}; +template +struct extent, I> : extent {}; template -struct extent, I> : integral_constant::value> {}; +struct extent, I> : extent {}; // Template Variables template -NBL_CONSTEXPR uint64_t extent_v = extent::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t extent_v = extent::value; template::value> @@ -844,15 +855,6 @@ struct float_of_size<8> template using float_of_size_t = typename float_of_size::type; -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 0> : integral_constant {}; - -template -struct extent, 1> : integral_constant {}; - } } diff --git a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl index 03ccd64d4e..22c93ce193 100644 --- a/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl +++ b/include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl @@ -225,7 +225,7 @@ template struct is_configuration > : bool_constant {}; template -NBL_CONSTEXPR bool is_configuration_v = is_configuration::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_configuration_v = is_configuration::value; } } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e8798499f9..6562fbb69b 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -145,10 +145,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/indirect_commands.hlsl") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 @@ -177,6 +179,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") #glsl compat @@ -351,5 +354,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") \ No newline at end of file