|
| 1 | +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. |
| 2 | +// This file is part of the "Nabla Engine". |
| 3 | +// For conditions of distribution and use, see copyright notice in nabla.h |
| 4 | + |
| 5 | +#include "nbl/examples/examples.hpp" |
| 6 | +#include "nbl/system/IApplicationFramework.h" |
| 7 | +#include "app_resources/common.h" |
| 8 | + |
| 9 | +#include <iostream> |
| 10 | +#include <cstdio> |
| 11 | +#include <assert.h> |
| 12 | + |
| 13 | + |
| 14 | +using namespace nbl; |
| 15 | +using namespace nbl::core; |
| 16 | +using namespace nbl::hlsl; |
| 17 | +using namespace nbl::system; |
| 18 | +using namespace nbl::asset; |
| 19 | +using namespace nbl::ui; |
| 20 | +using namespace nbl::video; |
| 21 | +using namespace nbl::examples; |
| 22 | + |
| 23 | +//using namespace glm; |
| 24 | + |
| 25 | +void cpu_tests(); |
| 26 | + |
| 27 | +class CooperativeBinarySearch final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication |
| 28 | +{ |
| 29 | + using device_base_t = application_templates::MonoDeviceApplication; |
| 30 | + using asset_base_t = BuiltinResourcesApplication; |
| 31 | +public: |
| 32 | + CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : |
| 33 | + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} |
| 34 | + |
| 35 | + bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override |
| 36 | + { |
| 37 | + // Remember to call the base class initialization! |
| 38 | + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) |
| 39 | + return false; |
| 40 | + if (!asset_base_t::onAppInitialized(std::move(system))) |
| 41 | + return false; |
| 42 | + |
| 43 | + m_queue = m_device->getQueue(0, 0); |
| 44 | + m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); |
| 45 | + m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); |
| 46 | + |
| 47 | + smart_refctd_ptr<IShader> shader; |
| 48 | + { |
| 49 | + IAssetLoader::SAssetLoadParams lp = {}; |
| 50 | + lp.logger = m_logger.get(); |
| 51 | + lp.workingDirectory = ""; // virtual root |
| 52 | + auto assetBundle = m_assetMgr->getAsset("app_resources/binarySearch.comp.hlsl", lp); |
| 53 | + const auto assets = assetBundle.getContents(); |
| 54 | + if (assets.empty()) |
| 55 | + return logFail("Could not load shader!"); |
| 56 | + |
| 57 | + auto source = IAsset::castDown<IShader>(assets[0]); |
| 58 | + // The down-cast should not fail! |
| 59 | + assert(source); |
| 60 | + |
| 61 | + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple |
| 62 | + shader = m_device->compileShader({ source.get() }); |
| 63 | + if (!shader) |
| 64 | + return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); |
| 65 | + } |
| 66 | + |
| 67 | + const uint32_t bindingCount = 2u; |
| 68 | + IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {}; |
| 69 | + bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(0)]] StructuredBuffer<uint> Histogram; |
| 70 | + bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(1)]] RWStructuredBuffer<uint> Output; |
| 71 | + |
| 72 | + for(int i = 0; i < bindingCount; ++i) |
| 73 | + { |
| 74 | + bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; |
| 75 | + bindings[i].count = 1; |
| 76 | + bindings[i].binding = i; |
| 77 | + } |
| 78 | + m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); |
| 79 | + { |
| 80 | + SPushConstantRange pcRange = {}; |
| 81 | + pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; |
| 82 | + pcRange.offset = 0u; |
| 83 | + pcRange.size = 2 * sizeof(uint32_t); |
| 84 | + auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); |
| 85 | + IGPUComputePipeline::SCreationParams params = {}; |
| 86 | + params.layout = layout.get(); |
| 87 | + params.shader.shader = shader.get(); |
| 88 | + params.shader.entryPoint = "main"; |
| 89 | + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) |
| 90 | + return logFail("Failed to create compute pipeline!\n"); |
| 91 | + } |
| 92 | + |
| 93 | + for (uint32_t i = 0; i < bindingCount; i++) |
| 94 | + { |
| 95 | + m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { |
| 96 | + {.size = 500000, .usage = |
| 97 | + IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | |
| 98 | + IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, |
| 99 | + } |
| 100 | + }); |
| 101 | + |
| 102 | + auto reqs = m_buffers[i]->getMemoryReqs(); |
| 103 | + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); |
| 104 | + m_device->allocate(reqs, m_buffers[i].get()); |
| 105 | + } |
| 106 | + |
| 107 | + smart_refctd_ptr<IDescriptorPool> descriptorPool = nullptr; |
| 108 | + { |
| 109 | + IDescriptorPool::SCreateInfo createInfo = {}; |
| 110 | + createInfo.maxSets = 1; |
| 111 | + createInfo.maxDescriptorCount[static_cast<uint32_t>(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 1; |
| 112 | + descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); |
| 113 | + } |
| 114 | + |
| 115 | + m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout)); |
| 116 | + |
| 117 | + IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {}; |
| 118 | + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {}; |
| 119 | + |
| 120 | + for(int i = 0; i < bindingCount; ++i) |
| 121 | + { |
| 122 | + writeDescriptorSets[i].info = &descriptorInfos[i]; |
| 123 | + writeDescriptorSets[i].dstSet = m_descriptorSet.get(); |
| 124 | + writeDescriptorSets[i].binding = i; |
| 125 | + writeDescriptorSets[i].count = bindings[i].count; |
| 126 | + |
| 127 | + descriptorInfos[i].desc = m_buffers[i]; |
| 128 | + descriptorInfos[i].info.buffer.size = ~0ull; |
| 129 | + } |
| 130 | + |
| 131 | + m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); |
| 132 | + |
| 133 | + // In contrast to fences, we just need one semaphore to rule all dispatches |
| 134 | + return true; |
| 135 | + } |
| 136 | + |
| 137 | + void onAppTerminated_impl() override |
| 138 | + { |
| 139 | + m_device->waitIdle(); |
| 140 | + } |
| 141 | + |
| 142 | + void workLoopBody() override |
| 143 | + { |
| 144 | + cpu_tests(); |
| 145 | + |
| 146 | + constexpr auto StartedValue = 0; |
| 147 | + |
| 148 | + smart_refctd_ptr<ISemaphore> progress = m_device->createSemaphore(StartedValue); |
| 149 | + |
| 150 | + m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); |
| 151 | + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); |
| 152 | + |
| 153 | + |
| 154 | + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t layoutBufferBarrier[1] = { { |
| 155 | + .barrier = { |
| 156 | + .dep = { |
| 157 | + .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT, |
| 158 | + .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT, |
| 159 | + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, |
| 160 | + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS |
| 161 | + } |
| 162 | + }, |
| 163 | + // whole buffer because we transferred the contents into it |
| 164 | + .range = {.offset = 0,.size = m_buffers[1]->getCreationParams().size,.buffer = m_buffers[1]} |
| 165 | + } }; |
| 166 | + |
| 167 | + const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .bufBarriers = layoutBufferBarrier }; |
| 168 | + m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); |
| 169 | + |
| 170 | + |
| 171 | + const uint32_t pushConstants[2] = { 1920, 1080 }; |
| 172 | + const IGPUDescriptorSet* set = m_descriptorSet.get(); |
| 173 | + m_cmdbuf->bindComputePipeline(m_pipeline.get()); |
| 174 | + m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); |
| 175 | + m_cmdbuf->dispatch(240, 135, 1u); |
| 176 | + |
| 177 | + layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); |
| 178 | + m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); |
| 179 | + |
| 180 | + m_cmdbuf->end(); |
| 181 | + |
| 182 | + { |
| 183 | + constexpr auto FinishedValue = 69; |
| 184 | + IQueue::SSubmitInfo submitInfos[1] = {}; |
| 185 | + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; |
| 186 | + submitInfos[0].commandBuffers = cmdbufs; |
| 187 | + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; |
| 188 | + submitInfos[0].signalSemaphores = signals; |
| 189 | + m_api->startCapture(); |
| 190 | + m_queue->submit(submitInfos); |
| 191 | + m_api->endCapture(); |
| 192 | + const ISemaphore::SWaitInfo waitInfos[] = { { |
| 193 | + .semaphore = progress.get(), |
| 194 | + .value = FinishedValue |
| 195 | + } }; |
| 196 | + m_device->blockForSemaphores(waitInfos); |
| 197 | + } |
| 198 | + |
| 199 | + auto mem = m_buffers[1]->getBoundMemory(); |
| 200 | + assert(mem.memory->isMappable()); |
| 201 | + auto* ptr = mem.memory->map({ .offset = 0, .length = mem.memory->getAllocationSize() }); |
| 202 | + printf("readback ptr %p\n", ptr); |
| 203 | + |
| 204 | + m_keepRunning = false; |
| 205 | + } |
| 206 | + |
| 207 | + bool keepRunning() override |
| 208 | + { |
| 209 | + return m_keepRunning; |
| 210 | + } |
| 211 | + |
| 212 | + |
| 213 | +private: |
| 214 | + smart_refctd_ptr<IGPUComputePipeline> m_pipeline = nullptr; |
| 215 | + smart_refctd_ptr<IGPUDescriptorSetLayout> m_descriptorSetLayout; |
| 216 | + smart_refctd_ptr<IGPUDescriptorSet> m_descriptorSet; |
| 217 | + |
| 218 | + smart_refctd_ptr<IGPUBuffer> m_buffers[2]; |
| 219 | + smart_refctd_ptr<IGPUCommandBuffer> m_cmdbuf = nullptr; |
| 220 | + IQueue* m_queue; |
| 221 | + smart_refctd_ptr<IGPUCommandPool> m_commandPool; |
| 222 | + uint64_t m_iteration = 0; |
| 223 | + constexpr static inline uint64_t MaxIterations = 200; |
| 224 | + |
| 225 | + bool m_keepRunning = true; |
| 226 | +}; |
| 227 | + |
| 228 | +NBL_MAIN_FUNC(CooperativeBinarySearch) |
| 229 | + |
| 230 | +void cpu_tests() |
| 231 | +{ |
| 232 | +} |
0 commit comments