diff --git a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl index cc22595444..ab7a87c7dd 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl @@ -69,7 +69,7 @@ NBL_CONCEPT_END( #include template -NBL_BOOL_CONCEPT GenericDataAccessor = GenericWriteAccessor && GenericWriteAccessor; +NBL_BOOL_CONCEPT GenericDataAccessor = GenericReadAccessor && GenericWriteAccessor; } } diff --git a/include/nbl/builtin/hlsl/concepts/warp.hlsl b/include/nbl/builtin/hlsl/concepts/warp.hlsl new file mode 100644 index 0000000000..e9e981a243 --- /dev/null +++ b/include/nbl/builtin/hlsl/concepts/warp.hlsl @@ -0,0 +1,41 @@ +#ifndef _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CONCEPTS_WARP_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/accessors/generic_shared_data.hlsl" +#include "nbl/builtin/hlsl/fft/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace concepts +{ + +// declare concept +#define NBL_CONCEPT_NAME WARP +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (U)(C) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (warp,U) +#define NBL_CONCEPT_PARAM_1 (uv,float32_t2) +#define NBL_CONCEPT_PARAM_2 (out,C) +// start concept +NBL_CONCEPT_BEGIN(3) +#define warp NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define out NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template warp(uv)) , ::nbl::hlsl::is_same_v, C)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template forwardDensity(uv)) , ::nbl::hlsl::is_same_v, float32_t)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template backwardDensity(out)) , ::nbl::hlsl::is_same_v, float32_t)) +); +#undef out +#undef warp +#undef uv +#include + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl new file mode 100644 index 0000000000..bfcd9ffec7 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/hierarchical_image.hlsl @@ -0,0 +1,136 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_HIERARCHICAL_IMAGE_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +class HierarchicalImage +{ + private: + + static float32_t3 calculateSampleAndPdf(float32_t4 dirsX, float32_t4 dirsY, float32_t2 unnormCoord, uint32_t2 lastWarpmapPixel, NBL_REF_ARG(float32_t) pdf) + { + const float32_t2 interpolant = frac(unnormCoord); + const float32_t4x2 uvs = transpose(float32_t2x4(dirsX, dirsY)); + + const float32_t2 xDiffs[] = { + uvs[2] - uvs[3], + uvs[1] - uvs[0] + }; + const float32_t2 yVals[] = { + xDiffs[0] * interpolant.x + uvs[3], + xDiffs[1] * interpolant.x + uvs[0] + }; + const float32_t2 yDiff = yVals[1] - yVals[0]; + const float32_t2 uv = yDiff * interpolant.y + yVals[0]; + + // Note(kevinyu): sinTheta is calculated twice inside PostWarp::warp and PostWarp::forwardDensity + const float32_t3 L = PostWarp::warp(uv); + + const float detInterpolJacobian = determinant(float32_t2x2( + lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx + yDiff // second column dFdy + )); + + pdf = abs(PostWarp::forwardDensity(uv) / (detInterpolJacobian * float32_t(lastWarpmapPixel.x * lastWarpmapPixel.y)); + + return L; + } + + public: + template ) + static float32_t2 binarySearch(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, const uint32_t2 lumaMapSize, const float32_t2 xi, const bool aspect2x1) + { + + uint32_t2 p = uint32_t2(0, 0); + + if (aspect2x1) { + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.fetch(uint32_t2(0, 0), mip2x1), luminanceAccessor.fetch(uint32_t2(0, 1), mip2x1), xi.x) ? 1 : 0; + } + + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = luminanceAccessor.gather(p, i); + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + p.x |= 1; + } + + // TODO(kevinyu): Add some comment why we add xi. + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + return directionUV; + } + + + template && Warp) + static float32_t3 sampleWarpmap(NBL_CONST_REF_ARG(WarpmapAccessor) warpmap, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + // TODO(kevinyu): Add some comment why we substract by 1 + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 interpolant = frac(unnormCoord); + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t4 dirsX = warpmap.gatherU(warpSampleCoord); + const float32_t4 dirsY = warpmap.gatherV(warpSampleCoord); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } + + template && Warp) + static float32_t3 sample(NBL_CONST_REF_ARG(LuminanceReadAccessor) luminanceMap, const uint32_t2 lumaMapSize, const bool lumaAspect2x1, const uint32_t2 warpmapSize, const float32_t2 xi, NBL_REF_ARG(float32_t) pdf) { + + const uint32_t3 lastWarpmapPixel = warpmapSize - uint32_t3(1, 1, 1); + const float32_t2 unnormCoord = xi * lastWarpmapPixel; + const float32_t2 warpSampleCoord = (unnormCoord + float32_t2(0.5f, 0.5f)) / float32_t2(warpmapSize.x, warpmapSize.y); + const float32_t2 dir0 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(0, 1), lumaAspect2x1); + const float32_t2 dir1 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 1), lumaAspect2x1); + const float32_t2 dir2 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord + float32_t2(1, 0), lumaAspect2x1); + const float32_t2 dir3 = binarySearch(luminanceMap, lumaMapSize, warpSampleCoord, lumaAspect2x1); + + const float32_t4 dirsX = float32_t4(dir0.x, dir1.x, dir2.x, dir3.x); + const float32_t4 dirsY = float32_t4(dir1.y, dir1.y, dir2.y, dir3.y); + + return calculateSampleAndPdf(dirsX, dirsY, unnormCoord, lastWarpmapPixel, pdf); + + } +}; + +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/warp/spherical.hlsl b/include/nbl/builtin/hlsl/warp/spherical.hlsl new file mode 100644 index 0000000000..10c341f06b --- /dev/null +++ b/include/nbl/builtin/hlsl/warp/spherical.hlsl @@ -0,0 +1,53 @@ +#ifndef _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WARP_SPHERICAL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +namespace warp +{ + + class Spherical + { + public: + using codomain_type = float32_t3; + + template ) + static codomain_type warp(const UV uv) + { + const float32_t phi = 2 * uv.x * numbers::pi; + const float32_t theta = uv.y * numbers::pi; + float32_t3 dir; + dir.x = cos(uv.x * 2.f * numbers::pi); + dir.y = sqrt(1.f - dir.x * dir.x); + if (uv.x > 0.5f) dir.y = -dir.y; + const float32_t cosTheta = cos(theta); + float32_t sinTheta = (1.0 - cosTheta * cosTheta); + dir.xy *= sinTheta; + dir.z = cosTheta; + return dir; + } + + template ) + static float32_t forwardDensity(const UV uv) + { + const float32_t theta = uv.y * numbers::pi; + return 1.0f / (sin(theta) * 2 * PI * PI); + + } + + template ) + static float32_t backwardDensity(const C out) + { + //TODO(kevinyu): Derive this density + } + }; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/workgroup/envmap.hlsl b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl new file mode 100644 index 0000000000..df452fb0e8 --- /dev/null +++ b/include/nbl/builtin/hlsl/workgroup/envmap.hlsl @@ -0,0 +1,108 @@ + +#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ +namespace impl +{ + bool choseSecond(float first, float second, NBL_REF_ARG(float) xi) + { + // numerical resilience against IEEE754 + float firstProb = 1.0f / (1.0f + second / first); + float dummy = 0.0f; + return math::partitionRandVariable(firstProb, xi, dummy); + } + +} + +} +} +} +} + +#ifdef __HLSL_VERSION +namespace nbl +{ +namespace hlsl +{ +namespace workgroup +{ +namespace envmap +{ + +struct WarpmapGeneration +{ + + template && envmap::WarpmapWriteAccessor) + // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor? + static void __call(NBL_CONST_REF_ARG(LuminanceAccessor) luminanceAccessor, NBL_REF_ARG(OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize) + { + const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); + const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2(1, 1); + + if (all(threadID < warpMapSize)) + { + float32_t2 xi = float32_t2(threadID) / float32_t2(lastWarpMapPixel); + + uint32_t2 p; + p.y = 0; + + // TODO(kevinyu): Implement findMSB + const uint32_t2 mip2x1 = findMSB(lumaMapSize.x) - 1; + // do one split in the X axis first cause penultimate full mip would have been 2x1 + p.x = impl::choseSecond(luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(0, 0)), luminanceAccessor.get(uint32_t2(0, 0), mip2x1, uint32_t2(1, 0), xi.x) ? 1 : 0; + for (uint32_t i = mip2x1; i != 0;) + { + --i; + p <<= 1; + const float32_t4 values = float32_t4( + luminanceAccessor.get(p, i, uint32_t2(0, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 1)), + luminanceAccessor.get(p, i, uint32_t2(1, 0)), + luminanceAccessor.get(p, i, uint32_t2(0, 0)) + ); + + float32_t wx_0, wx_1; + { + const float32_t wy_0 = values[3] + values[2]; + const float32_t wy_1 = values[1] + values[0]; + if (impl::choseSecond(wy_0, wy_1, xi.y)) + { + p.y |= 1; + wx_0 = values[0]; + wx_1 = values[1]; + } + else + { + wx_0 = values[3]; + wx_1 = values[2]; + } + } + + if (impl::choseSecond(wx_0, wx_1, xi.x)) + { + p.x |= 1; + } + } + + const float32_t2 directionUV = (float32_t2(p.x, p.y) + xi) / float32_t2(lumaMapSize); + outputAccessor.set(threadID, directionUV); + } + } + +}; + +} +} +} +} +#endif + +#endif \ No newline at end of file