@@ -186,7 +186,7 @@ class Workgroup2ScanTestApp final : public application_templates::BasicMultiQueu
186186 for (auto subgroupSize = MinSubgroupSize; subgroupSize <= MaxSubgroupSize; subgroupSize *= 2u )
187187 {
188188 const uint8_t subgroupSizeLog2 = hlsl::findMSB (subgroupSize);
189- for (uint32_t workgroupSize = subgroupSize ; workgroupSize <= MaxWorkgroupSize; workgroupSize *= 2u )
189+ for (uint32_t workgroupSize = 64 ; workgroupSize <= MaxWorkgroupSize; workgroupSize *= 2u )
190190 {
191191 // make sure renderdoc captures everything for debugging
192192 m_api->startCapture ();
@@ -198,14 +198,15 @@ class Workgroup2ScanTestApp final : public application_templates::BasicMultiQueu
198198 uint32_t itemsPerWG = workgroupSize * itemsPerInvocation;
199199 m_logger->log (" Testing Items per Invocation %u" , ILogger::ELL_INFO, itemsPerInvocation);
200200 bool passed = true ;
201- passed = runTest<emulatedReduction, false >(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool (useNative), itemsPerWG, itemsPerInvocation) && passed;
202- logTestOutcome (passed, itemsPerWG);
203- passed = runTest<emulatedScanInclusive, false >(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool (useNative), itemsPerWG, itemsPerInvocation) && passed;
204- logTestOutcome (passed, itemsPerWG);
205- passed = runTest<emulatedScanExclusive, false >(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool (useNative), itemsPerWG, itemsPerInvocation) && passed;
206- logTestOutcome (passed, itemsPerWG);
207-
208- hlsl::workgroup2::SArithmeticConfiguration wgConfig = hlsl::workgroup2::SArithmeticConfiguration::create (hlsl::findMSB (workgroupSize), subgroupSizeLog2, itemsPerInvocation);
201+ // passed = runTest<emulatedReduction, false>(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed;
202+ // logTestOutcome(passed, itemsPerWG);
203+ // passed = runTest<emulatedScanInclusive, false>(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed;
204+ // logTestOutcome(passed, itemsPerWG);
205+ // passed = runTest<emulatedScanExclusive, false>(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed;
206+ // logTestOutcome(passed, itemsPerWG);
207+
208+ hlsl::workgroup2::SArithmeticConfiguration wgConfig;
209+ wgConfig.init (hlsl::findMSB (workgroupSize), subgroupSizeLog2, itemsPerInvocation);
209210 itemsPerWG = wgConfig.VirtualWorkgroupSize * wgConfig.ItemsPerInvocation_0 ;
210211 m_logger->log (" Testing Item Count %u" , ILogger::ELL_INFO, itemsPerWG);
211212 passed = runTest<emulatedReduction, true >(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool (useNative), itemsPerWG, itemsPerInvocation) && passed;
@@ -306,28 +307,25 @@ class Workgroup2ScanTestApp final : public application_templates::BasicMultiQueu
306307 smart_refctd_ptr<ICPUShader> overriddenUnspecialized;
307308 if constexpr (WorkgroupTest)
308309 {
309- const std::string definitions[6 ] = {
310+ hlsl::workgroup2::SArithmeticConfiguration wgConfig;
311+ wgConfig.init (hlsl::findMSB (workgroupSize), subgroupSizeLog2, itemsPerInvoc);
312+
313+ const std::string definitions[3 ] = {
310314 " workgroup2::" + arith_name,
311- std::to_string (workgroupSizeLog2),
312- std::to_string (itemsPerWG),
313- std::to_string (itemsPerInvoc),
314- std::to_string (subgroupSizeLog2),
315+ wgConfig.getConfigTemplateStructString (),
315316 std::to_string (arith_name==" reduction" )
316317 };
317318
318- const IShaderCompiler::SMacroDefinition defines[7 ] = {
319+ const IShaderCompiler::SMacroDefinition defines[4 ] = {
319320 { " OPERATION" , definitions[0 ] },
320- { " WORKGROUP_SIZE_LOG2" , definitions[1 ] },
321- { " ITEMS_PER_WG" , definitions[2 ] },
322- { " ITEMS_PER_INVOCATION" , definitions[3 ] },
323- { " SUBGROUP_SIZE_LOG2" , definitions[4 ] },
324- { " IS_REDUCTION" , definitions[5 ] },
321+ { " WORKGROUP_CONFIG_T" , definitions[1 ] },
322+ { " IS_REDUCTION" , definitions[2 ] },
325323 { " TEST_NATIVE" , " 1" }
326324 };
327325 if (useNative)
328- options.preprocessorOptions .extraDefines = { defines, defines + 7 };
326+ options.preprocessorOptions .extraDefines = { defines, defines + 4 };
329327 else
330- options.preprocessorOptions .extraDefines = { defines, defines + 6 };
328+ options.preprocessorOptions .extraDefines = { defines, defines + 3 };
331329
332330 overriddenUnspecialized = compiler->compileToSPIRV ((const char *)source->getContent ()->getPointer (), options);
333331 }
@@ -358,7 +356,7 @@ class Workgroup2ScanTestApp final : public application_templates::BasicMultiQueu
358356 auto pipeline = createPipeline (overriddenUnspecialized.get (),subgroupSizeLog2);
359357
360358 // TODO: overlap dispatches with memory readbacks (requires multiple copies of `buffers`)
361- uint32_t workgroupCount = min (elementCount / itemsPerWG, m_physicalDevice->getLimits ().maxComputeWorkGroupCount [0 ]);
359+ uint32_t workgroupCount = 1 ; // min(elementCount / itemsPerWG, m_physicalDevice->getLimits().maxComputeWorkGroupCount[0]);
362360
363361 cmdbuf->begin (IGPUCommandBuffer::USAGE::NONE);
364362 cmdbuf->bindComputePipeline (pipeline.get ());
0 commit comments