diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 6933e77e5..e25fa58a2 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -4,518 +4,486 @@ #include #include "app_resources/testCommon.hlsl" #include "app_resources/testCommon2.hlsl" -#include "ITester.h" +#include "nbl/examples/Tester/ITester.h" using namespace nbl; -class CTester final : public ITester +class CTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); +private: + InputTestValues generateInputTestValues() override + { std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - uint64_t generatedA = longDistribution(mt); - uint64_t generatedB = longDistribution(mt); - - testInput.generatedA = generatedA; - testInput.generatedB = generatedB; - - expected.emulatedAnd = _static_cast(generatedA & generatedB); - expected.emulatedOr = _static_cast(generatedA | generatedB); - expected.emulatedXor = _static_cast(generatedA ^ generatedB); - expected.emulatedNot = _static_cast(~generatedA); - expected.emulatedPlus = _static_cast(generatedA + generatedB); - expected.emulatedMinus = _static_cast(generatedA - generatedB); - expected.emulatedUnaryMinus = _static_cast(-generatedA); - expected.emulatedLess = uint32_t(generatedA < generatedB); - expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); - expected.emulatedGreater = uint32_t(generatedA > generatedB); - expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); - - expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); - expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); - expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); - } - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); - - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - - uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - - uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - - uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them - // so their highest bits are all 0s or 1s depending on the sign of the number they encode - - int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - // Plus - expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); - expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); - expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); - expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); - - expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); - expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); - expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); - expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); - - expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); - expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); - expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); - expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); - - // Minus - expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); - expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); - expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); - expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); - - expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); - expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); - expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); - expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); - - expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); - expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); - expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); - expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); - - // Coordinate-wise equality - expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); - expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); - expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - - expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); - expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); - expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - - expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); - expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); - expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); - expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); - expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - - expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); - expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); - expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - - expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); - expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); - expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - // Coordinate-wise signed inequality - expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); - expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); - expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - - expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); - expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); - expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - - expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); - expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); - expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(generatedShift); - // Left-shift - expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); - expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); - expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); - expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); - - expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); - expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); - expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); - expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); - - expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); - expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); - expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); - expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); - - // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); - - expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); - - expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); - - // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; -private: - inline static constexpr int Iterations = 100u; + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues cpuTestValues; + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); + testInput.shift = generatedShift; - fillTestValues(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); + return testInput; } - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + TestValues determineExpectedResults(const InputTestValues& testInput) override { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + { + const uint64_t generatedA = testInput.generatedA; + const uint64_t generatedB = testInput.generatedB; + const uint32_t generatedShift = testInput.shift; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Plus + expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(testInput.shift); + // Left-shift + expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); + expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); + expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + + expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); + expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); + expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + + expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); + expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); + expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + } + + return expected; } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override { // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); - verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testIteration, seed, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testIteration, seed, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testIteration, seed, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testIteration, seed, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testIteration, seed, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testIteration, seed, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testIteration, seed, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testIteration, seed, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testIteration, seed, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testIteration, seed, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testIteration, seed, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testIteration, seed, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testIteration, seed, testType); + verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testIteration, seed, testType); // Morton Plus - verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); - verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); - verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); - verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); - - verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); - - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); - + verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testIteration, seed, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testIteration, seed, testType); + // Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); - - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); - - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); - + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testIteration, seed, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testIteration, seed, testType); + // Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); - verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType); - - // Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); - // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); - - // Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); - // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); - - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); - // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); - - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); - - // Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); - - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); - - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); - - // Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); - - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); - - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); - - // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); - // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); - //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); - // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); - } -}; + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testIteration, seed, testType); -// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. -class CTester2 final : public ITester -{ -public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testIteration, seed, testType); - std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); - std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testIteration, seed, testType); + verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testIteration, seed, testType); - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; + // Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testIteration, seed, testType); - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testIteration, seed, testType); - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + // Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + // Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testIteration, seed, testType); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testIteration, seed, testType); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testIteration, seed, testType); - expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + // Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testIteration, seed, testType); - uint16_t castedShift = uint16_t(generatedShift); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testIteration, seed, testType); - } + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testIteration, seed, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testIteration, seed, testType); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + // Morton signed right-shift + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testIteration, seed, testType); + //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testIteration, seed, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); } +}; -private: - inline static constexpr int Iterations = 100u; +// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. +class CTester2 final : public ITester +{ + using base_t = ITester; +public: + CTester2(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) +private: + InputTestValues generateInputTestValues() override { - TestValues cpuTestValues; + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - fillTestValues2(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; - } + testInput.generatedA = longDistribution(getRandomEngine()); + testInput.generatedB = longDistribution(getRandomEngine()); - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + uint32_t generatedShift = intDistribution(getRandomEngine()) & uint32_t(63); + testInput.shift = generatedShift; + + testInput.coordX = longDistribution(getRandomEngine()); + testInput.coordY = longDistribution(getRandomEngine()); + testInput.coordZ = longDistribution(getRandomEngine()); + testInput.coordW = longDistribution(getRandomEngine()); + + return testInput; } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + TestValues determineExpectedResults(const InputTestValues& testInput) override { + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + const uint32_t generatedShift = testInput.shift; + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); - verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); - verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + + return expected; + } + + void verifyTestResults(const TestValues& expectedTestValues, const TestValues& testValues, const size_t testIteration, const uint32_t seed, ITester::TestType testType) override + { + verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testIteration, seed, testType); + + verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testIteration, seed, testType); + verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testIteration, seed, testType); + + verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testIteration, seed, testType); + verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testIteration, seed, testType); } }; #endif \ No newline at end of file diff --git a/14_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl index 60cdf94b1..591915109 100644 --- a/14_Mortons/app_resources/test.comp.hlsl +++ b/14_Mortons/app_resources/test.comp.hlsl @@ -3,15 +3,16 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "testCommon.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(1, 1, 1)] +[numthreads(WORKGROUP_SIZE, 1, 1)] [shader("compute")] -void main(uint3 invocationID : SV_DispatchThreadID) +void main() { - uint32_t testID = glsl::gl_GlobalInvocationID().x; - fillTestValues(inputTestValues[testID], outputTestValues[testID]); + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); } diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl index 30b998f49..045ba1bdc 100644 --- a/14_Mortons/app_resources/test2.comp.hlsl +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -3,7 +3,7 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "testCommon2.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; @@ -12,6 +12,7 @@ [shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { - uint32_t testID = glsl::gl_GlobalInvocationID().x; - fillTestValues2(inputTestValues[testID], outputTestValues[testID]); + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TestExecutor2 executor; + executor(inputTestValues[invID], outputTestValues[invID]); } diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index 6144b6ce9..b285bd8cd 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -1,295 +1,297 @@ #include "common.hlsl" - -void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +struct TestExecutor { - emulated_uint64_t emulatedA = _static_cast(input.generatedA); - emulated_uint64_t emulatedB = _static_cast(input.generatedB); - emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); - - // Emulated int tests - output.emulatedAnd = emulatedA & emulatedB; - output.emulatedOr = emulatedA | emulatedB; - output.emulatedXor = emulatedA ^ emulatedB; - output.emulatedNot = emulatedA.operator~(); - output.emulatedPlus = emulatedA + emulatedB; - output.emulatedMinus = emulatedA - emulatedB; - output.emulatedLess = uint32_t(emulatedA < emulatedB); - output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); - output.emulatedGreater = uint32_t(emulatedA > emulatedB); - output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); - - left_shift_operator leftShift; - output.emulatedLeftShifted = leftShift(emulatedA, input.shift); - - arithmetic_right_shift_operator unsignedRightShift; - output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); - - arithmetic_right_shift_operator signedRightShift; - output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); - - output.emulatedUnaryMinus = signedEmulatedA.operator-(); - - // Morton tests - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); - int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); - int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); - int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); - int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); - int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); - morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); - morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); - - morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); - morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); - morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); - - morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); - morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); - - morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); - - morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); - - morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - - // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. - // Plus - output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; - output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; - output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; - output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; - - output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; - output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; - output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; - output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; - - output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; - output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; - output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; - output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - - // Minus - output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; - output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; - output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; - output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; - - output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; - output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; - output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; - output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; - - output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; - output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; - output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; - output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - - // Coordinate-wise equality - output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); - output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); - output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); - output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); - - output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); - output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); - output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); - output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); - - output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); - output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); - output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); - output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); - output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); - output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); - - output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); - output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); - output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); - output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); - - output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); - output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); - output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); - // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - - // Coordinate-wise signed inequality - output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); - output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); - output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); - // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); - - output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); - output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); - output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); - // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); - - output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); - output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); - output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); - // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); - - // Cast to uint16_t which is what left shift for Mortons expect - uint16_t castedShift = uint16_t(input.shift); - // Each left shift clamps to correct bits so the result kinda makes sense - // Left-shift - left_shift_operator > leftShiftSmall2; - output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); - left_shift_operator > leftShiftMedium2; - output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - left_shift_operator > leftShiftFull2; - output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); - left_shift_operator > leftShiftEmulated2; - output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - left_shift_operator > leftShiftSmall3; - output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); - left_shift_operator > leftShiftMedium3; - output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - left_shift_operator > leftShiftFull3; - output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); - left_shift_operator > leftShiftEmulated3; - output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - left_shift_operator > leftShiftSmall4; - output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); - left_shift_operator > leftShiftMedium4; - output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - left_shift_operator > leftShiftFull4; - output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); - left_shift_operator > leftShiftEmulated4; - output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Unsigned right-shift - arithmetic_right_shift_operator > rightShiftSmall2; - output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftMedium2; - output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftFull2; - output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftEmulated2; - output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSmall3; - output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftMedium3; - output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftFull3; - output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftEmulated3; - output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSmall4; - output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftMedium4; - output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftFull4; - output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); - arithmetic_right_shift_operator > rightShiftEmulated4; - output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Signed right-shift - arithmetic_right_shift_operator > rightShiftSignedSmall2; - output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftSignedMedium2; - output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftSignedFull2; - output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); - // arithmetic_right_shift_operator > rightShiftSignedEmulated2; - // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSignedSmall3; - output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftSignedMedium3; - output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftSignedFull3; - output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); - // arithmetic_right_shift_operator > rightShiftSignedEmulated3; - // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSignedSmall4; - output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftSignedMedium4; - output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftSignedFull4; - output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); - // arithmetic_right_shift_operator > rightShiftSignedEmulated4; - // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); - -} \ No newline at end of file + void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) + { + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); + + output.emulatedUnaryMinus = signedEmulatedA.operator-(); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); + + morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); + + morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); + + morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + + morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + + morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); + // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); + // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); + // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); + // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + // arithmetic_right_shift_operator > rightShiftSignedEmulated2; + // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + // arithmetic_right_shift_operator > rightShiftSignedEmulated3; + // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + // arithmetic_right_shift_operator > rightShiftSignedEmulated4; + // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); + + } +}; diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl index 365b82340..5c2a953ac 100644 --- a/14_Mortons/app_resources/testCommon2.hlsl +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -1,39 +1,42 @@ #include "common.hlsl" -void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +struct TestExecutor2 { - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); - int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); - int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - - morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); - morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); - morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); - morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - - - output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - - output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); - output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); - output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(input.shift); - - arithmetic_right_shift_operator > rightShiftSignedEmulated2; - output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftSignedEmulated3; - output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftSignedEmulated4; - output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); -} + void operator()(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) + { + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(input.shift); + + arithmetic_right_shift_operator > rightShiftSignedEmulated2; + output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftSignedEmulated3; + output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftSignedEmulated4; + output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); + } +}; diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index 12f55805f..a36db8a19 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -34,26 +34,36 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - - CTester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester; + CTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; - mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTests(); - CTester2 mortonTester2; + CTester mortonTester(4); // 4 * 128 = 512 tests + mortonTester.setupPipeline(pplnSetupData); + mortonTester.performTestsAndVerifyResults(); + + } + { + CTester2::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; - mortonTester2.setupPipeline(pplnSetupData); - mortonTester2.performTests(); + + CTester2 mortonTester2(4); + mortonTester2.setupPipeline(reinterpret_cast(pplnSetupData)); + mortonTester2.performTestsAndVerifyResults(); } return true; diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index f014bd1cb..c92df0079 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -5,19 +5,21 @@ #include "nbl/examples/examples.hpp" #include "app_resources/common.hlsl" -#include "ITester.h" using namespace nbl; -class CIntrinsicsTester final : public ITester +class CIntrinsicsTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CIntrinsicsTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; +private: + IntrinsicsIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); std::uniform_real_distribution realDistributionZeroToOne(0.0f, 1.0f); @@ -26,262 +28,232 @@ class CIntrinsicsTester final : public ITester std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution uintDistribution(0, 100); - m_logger->log("intrinsics.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - IntrinsicsIntputTestValues testInput; - testInput.bitCount = intDistribution(mt); - testInput.crossLhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.crossRhs = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampVal = realDistribution(mt); - testInput.clampMin = realDistributionNeg(mt); - testInput.clampMax = realDistributionPos(mt); - testInput.length = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.normalize = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.dotLhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.dotRhs = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.determinant = float32_t3x3( - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt), - realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt) - ); - testInput.findMSB = realDistribution(mt); - testInput.findLSB = realDistribution(mt); - testInput.inverse = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.transpose = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulLhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.mulRhs = float32_t3x3( - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt), - realDistribution(mt), realDistribution(mt), realDistribution(mt) - ); - testInput.minA = realDistribution(mt); - testInput.minB = realDistribution(mt); - testInput.maxA = realDistribution(mt); - testInput.maxB = realDistribution(mt); - testInput.rsqrt = realDistributionPos(mt); - testInput.bitReverse = realDistribution(mt); - testInput.frac = realDistribution(mt); - testInput.mixX = realDistributionNeg(mt); - testInput.mixY = realDistributionPos(mt); - testInput.mixA = realDistributionZeroToOne(mt); - testInput.sign = realDistribution(mt); - testInput.radians = realDistribution(mt); - testInput.degrees = realDistribution(mt); - testInput.stepEdge = realDistribution(mt); - testInput.stepX = realDistribution(mt); - testInput.smoothStepEdge0 = realDistributionNeg(mt); - testInput.smoothStepEdge1 = realDistributionPos(mt); - testInput.smoothStepX = realDistribution(mt); - testInput.addCarryA = std::numeric_limits::max() - uintDistribution(mt); - testInput.addCarryB = uintDistribution(mt); - testInput.subBorrowA = uintDistribution(mt); - testInput.subBorrowB = uintDistribution(mt); - - testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.clampMinVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.clampMaxVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.findMSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.findLSBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.minAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.minBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxAVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.maxBVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.rsqrtVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.bitReverseVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.fracVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.mixXVec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.mixYVec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.mixAVec = float32_t3(realDistributionZeroToOne(mt), realDistributionZeroToOne(mt), realDistributionZeroToOne(mt)); - - testInput.signVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.radiansVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.degreesVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepEdgeVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.stepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(mt), realDistributionNeg(mt), realDistributionNeg(mt)); - testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(mt), realDistributionPos(mt), realDistributionPos(mt)); - testInput.smoothStepXVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardN = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.faceForwardNref = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.reflectN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); - testInput.refractEta = realDistribution(mt); - testInput.addCarryAVec = uint32_t3(std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt)); - testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); - - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - IntrinsicsTestValues expected; - expected.bitCount = glm::bitCount(testInput.bitCount); - expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); - expected.length = glm::length(testInput.length); - expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); - expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); - expected.findMSB = glm::findMSB(testInput.findMSB); - expected.findLSB = glm::findLSB(testInput.findLSB); - expected.min = glm::min(testInput.minA, testInput.minB); - expected.max = glm::max(testInput.maxA, testInput.maxB); - expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); - expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); - expected.sign = glm::sign(testInput.sign); - expected.radians = glm::radians(testInput.radians); - expected.degrees = glm::degrees(testInput.degrees); - expected.step = glm::step(testInput.stepEdge, testInput.stepX); - expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - - expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); - expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + IntrinsicsIntputTestValues testInput; + testInput.bitCount = intDistribution(getRandomEngine()); + testInput.crossLhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.crossRhs = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampVal = realDistribution(getRandomEngine()); + testInput.clampMin = realDistributionNeg(getRandomEngine()); + testInput.clampMax = realDistributionPos(getRandomEngine()); + testInput.length = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.normalize = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.dotLhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.dotRhs = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.determinant = float32_t3x3( + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), + realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()) + ); + testInput.findMSB = realDistribution(getRandomEngine()); + testInput.findLSB = realDistribution(getRandomEngine()); + testInput.inverse = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.transpose = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulLhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.mulRhs = float32_t3x3( + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), + realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()) + ); + testInput.minA = realDistribution(getRandomEngine()); + testInput.minB = realDistribution(getRandomEngine()); + testInput.maxA = realDistribution(getRandomEngine()); + testInput.maxB = realDistribution(getRandomEngine()); + testInput.rsqrt = realDistributionPos(getRandomEngine()); + testInput.bitReverse = realDistribution(getRandomEngine()); + testInput.frac = realDistribution(getRandomEngine()); + testInput.mixX = realDistributionNeg(getRandomEngine()); + testInput.mixY = realDistributionPos(getRandomEngine()); + testInput.mixA = realDistributionZeroToOne(getRandomEngine()); + testInput.sign = realDistribution(getRandomEngine()); + testInput.radians = realDistribution(getRandomEngine()); + testInput.degrees = realDistribution(getRandomEngine()); + testInput.stepEdge = realDistribution(getRandomEngine()); + testInput.stepX = realDistribution(getRandomEngine()); + testInput.smoothStepEdge0 = realDistributionNeg(getRandomEngine()); + testInput.smoothStepEdge1 = realDistributionPos(getRandomEngine()); + testInput.smoothStepX = realDistribution(getRandomEngine()); - expected.frac = testInput.frac - std::floor(testInput.frac); - expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); + testInput.bitCountVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.clampValVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.clampMinVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.clampMaxVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.findMSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.findLSBVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.minAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.minBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxAVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.maxBVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.rsqrtVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.bitReverseVec = uint32_t3(uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine()), uintDistribution(getRandomEngine())); + testInput.fracVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.mixXVec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.mixYVec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.mixAVec = float32_t3(realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine()), realDistributionZeroToOne(getRandomEngine())); - expected.normalize = glm::normalize(testInput.normalize); - expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); - expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); - expected.clampVec = float32_t3( - glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), - glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), - glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) - ); - expected.findMSBVec = glm::findMSB(testInput.findMSBVec); - expected.findLSBVec = glm::findLSB(testInput.findLSBVec); - expected.minVec = float32_t3( - glm::min(testInput.minAVec.x, testInput.minBVec.x), - glm::min(testInput.minAVec.y, testInput.minBVec.y), - glm::min(testInput.minAVec.z, testInput.minBVec.z) - ); - expected.maxVec = float32_t3( - glm::max(testInput.maxAVec.x, testInput.maxBVec.x), - glm::max(testInput.maxAVec.y, testInput.maxBVec.y), - glm::max(testInput.maxAVec.z, testInput.maxBVec.z) - ); - expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); - expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); - expected.fracVec = float32_t3( - testInput.fracVec.x - std::floor(testInput.fracVec.x), - testInput.fracVec.y - std::floor(testInput.fracVec.y), - testInput.fracVec.z - std::floor(testInput.fracVec.z)); - expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); - expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); - expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); + testInput.signVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.radiansVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.degreesVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepEdgeVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.stepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.smoothStepEdge0Vec = float32_t3(realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine()), realDistributionNeg(getRandomEngine())); + testInput.smoothStepEdge1Vec = float32_t3(realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine()), realDistributionPos(getRandomEngine())); + testInput.smoothStepXVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardN = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.faceForwardNref = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.reflectN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractI = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.refractN = glm::normalize(float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine()))); + testInput.refractEta = realDistribution(getRandomEngine()); - expected.signVec = glm::sign(testInput.signVec); - expected.radiansVec = glm::radians(testInput.radiansVec); - expected.degreesVec = glm::degrees(testInput.degreesVec); - expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); - expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); - expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); - expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); - expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); + return testInput; + } - expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); - expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + IntrinsicsTestValues determineExpectedResults(const IntrinsicsIntputTestValues& testInput) override + { + IntrinsicsTestValues expected; + expected.bitCount = glm::bitCount(testInput.bitCount); + expected.clamp = glm::clamp(testInput.clampVal, testInput.clampMin, testInput.clampMax); + expected.length = glm::length(testInput.length); + expected.dot = glm::dot(testInput.dotLhs, testInput.dotRhs); + expected.determinant = glm::determinant(reinterpret_cast(testInput.determinant)); + expected.findMSB = glm::findMSB(testInput.findMSB); + expected.findLSB = glm::findLSB(testInput.findLSB); + expected.min = glm::min(testInput.minA, testInput.minB); + expected.max = glm::max(testInput.maxA, testInput.maxB); + expected.rsqrt = (1.0f / std::sqrt(testInput.rsqrt)); + expected.mix = std::lerp(testInput.mixX, testInput.mixY, testInput.mixA); + expected.sign = glm::sign(testInput.sign); + expected.radians = glm::radians(testInput.radians); + expected.degrees = glm::degrees(testInput.degrees); + expected.step = glm::step(testInput.stepEdge, testInput.stepX); + expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); - auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); - expected.mul = reinterpret_cast(mulGlm); - auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); - expected.transpose = reinterpret_cast(transposeGlm); - auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); - expected.inverse = reinterpret_cast(inverseGlm); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("intrinsics.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.frac = testInput.frac - std::floor(testInput.frac); + expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); -private: - inline static constexpr int Iterations = 100u; + expected.normalize = glm::normalize(testInput.normalize); + expected.cross = glm::cross(testInput.crossLhs, testInput.crossRhs); + expected.bitCountVec = int32_t3(glm::bitCount(testInput.bitCountVec.x), glm::bitCount(testInput.bitCountVec.y), glm::bitCount(testInput.bitCountVec.z)); + expected.clampVec = float32_t3( + glm::clamp(testInput.clampValVec.x, testInput.clampMinVec.x, testInput.clampMaxVec.x), + glm::clamp(testInput.clampValVec.y, testInput.clampMinVec.y, testInput.clampMaxVec.y), + glm::clamp(testInput.clampValVec.z, testInput.clampMinVec.z, testInput.clampMaxVec.z) + ); + expected.findMSBVec = glm::findMSB(testInput.findMSBVec); + expected.findLSBVec = glm::findLSB(testInput.findLSBVec); + expected.minVec = float32_t3( + glm::min(testInput.minAVec.x, testInput.minBVec.x), + glm::min(testInput.minAVec.y, testInput.minBVec.y), + glm::min(testInput.minAVec.z, testInput.minBVec.z) + ); + expected.maxVec = float32_t3( + glm::max(testInput.maxAVec.x, testInput.maxBVec.x), + glm::max(testInput.maxAVec.y, testInput.maxBVec.y), + glm::max(testInput.maxAVec.z, testInput.maxBVec.z) + ); + expected.rsqrtVec = float32_t3(1.0f / std::sqrt(testInput.rsqrtVec.x), 1.0f / std::sqrt(testInput.rsqrtVec.y), 1.0f / std::sqrt(testInput.rsqrtVec.z)); + expected.bitReverseVec = glm::bitfieldReverse(testInput.bitReverseVec); + expected.fracVec = float32_t3( + testInput.fracVec.x - std::floor(testInput.fracVec.x), + testInput.fracVec.y - std::floor(testInput.fracVec.y), + testInput.fracVec.z - std::floor(testInput.fracVec.z)); + expected.mixVec.x = std::lerp(testInput.mixXVec.x, testInput.mixYVec.x, testInput.mixAVec.x); + expected.mixVec.y = std::lerp(testInput.mixXVec.y, testInput.mixYVec.y, testInput.mixAVec.y); + expected.mixVec.z = std::lerp(testInput.mixXVec.z, testInput.mixYVec.z, testInput.mixAVec.z); - void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues cpuTestValues; + expected.signVec = glm::sign(testInput.signVec); + expected.radiansVec = glm::radians(testInput.radiansVec); + expected.degreesVec = glm::degrees(testInput.degreesVec); + expected.stepVec = glm::step(testInput.stepEdgeVec, testInput.stepXVec); + expected.smoothStepVec = glm::smoothstep(testInput.smoothStepEdge0Vec, testInput.smoothStepEdge1Vec, testInput.smoothStepXVec); + expected.faceForward = glm::faceforward(testInput.faceForwardN, testInput.faceForwardI, testInput.faceForwardNref); + expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); + expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); - } + auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); + expected.mul = reinterpret_cast(mulGlm); + auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); + expected.transpose = reinterpret_cast(transposeGlm); + auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); + expected.inverse = reinterpret_cast(inverseGlm); - void performGpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) - { - IntrinsicsTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, ITester::TestType testType) + void verifyTestResults(const IntrinsicsTestValues& expectedTestValues, const IntrinsicsTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { - verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testType); - verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testType); - verifyTestValue("length", expectedTestValues.length, testValues.length, testType); - verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testType); - verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testType); - verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testType); - verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testType); - verifyTestValue("min", expectedTestValues.min, testValues.min, testType); - verifyTestValue("max", expectedTestValues.max, testValues.max, testType); - verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testType); - verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testType); - verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testType); - verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testType); - verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testType); - verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testType); - verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); - verifyTestValue("step", expectedTestValues.step, testValues.step, testType); - verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); - verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); - verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); - verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); - verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); + verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testIteration, seed, testType); + verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testIteration, seed, testType); + verifyTestValue("length", expectedTestValues.length, testValues.length, testIteration, seed, testType); + verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testIteration, seed, testType); + verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testIteration, seed, testType); + verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testIteration, seed, testType); + verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testIteration, seed, testType); + verifyTestValue("min", expectedTestValues.min, testValues.min, testIteration, seed, testType); + verifyTestValue("max", expectedTestValues.max, testValues.max, testIteration, seed, testType); + verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testIteration, seed, testType); + verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testIteration, seed, testType); + verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testIteration, seed, testType); + verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testIteration, seed, testType); + verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testIteration, seed, testType); + verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testIteration, seed, testType); + verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testIteration, seed, testType); + verifyTestValue("step", expectedTestValues.step, testValues.step, testIteration, seed, testType); + verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testIteration, seed, testType); + verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testIteration, seed, testType); + verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testIteration, seed, testType); + verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testIteration, seed, testType); + verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testIteration, seed, testType); - verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); - verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); - verifyTestVector3dValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testType); - verifyTestVector3dValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testType); - verifyTestVector3dValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testType); - verifyTestVector3dValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testType); - verifyTestVector3dValue("minVec", expectedTestValues.minVec, testValues.minVec, testType); - verifyTestVector3dValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testType); - verifyTestVector3dValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testType); - verifyTestVector3dValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testType); - verifyTestVector3dValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testType); - verifyTestVector3dValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testType); + verifyTestValue("normalize", expectedTestValues.normalize, testValues.normalize, testIteration, seed, testType); + verifyTestValue("cross", expectedTestValues.cross, testValues.cross, testIteration, seed, testType); + verifyTestValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testIteration, seed, testType); + verifyTestValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testIteration, seed, testType); + verifyTestValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testIteration, seed, testType); + verifyTestValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testIteration, seed, testType); + verifyTestValue("minVec", expectedTestValues.minVec, testValues.minVec, testIteration, seed, testType); + verifyTestValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testIteration, seed, testType); + verifyTestValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testIteration, seed, testType); + verifyTestValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testIteration, seed, testType); + verifyTestValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testIteration, seed, testType); + verifyTestValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testIteration, seed, testType); - verifyTestVector3dValue("signVec", expectedTestValues.signVec, testValues.signVec, testType); - verifyTestVector3dValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testType); - verifyTestVector3dValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testType); - verifyTestVector3dValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testType); - verifyTestVector3dValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testType); - verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); - verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); - verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); - verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); - verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); - verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); - verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); + verifyTestValue("signVec", expectedTestValues.signVec, testValues.signVec, testIteration, seed, testType); + verifyTestValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testIteration, seed, testType); + verifyTestValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testIteration, seed, testType); + verifyTestValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testIteration, seed, testType); + verifyTestValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testIteration, seed, testType); + verifyTestValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testIteration, seed, testType); + verifyTestValue("reflect", expectedTestValues.reflect, testValues.reflect, testIteration, seed, testType); + verifyTestValue("refract", expectedTestValues.refract, testValues.refract, testIteration, seed, testType); + verifyTestValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testIteration, seed, testType); + verifyTestValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testIteration, seed, testType); + verifyTestValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testIteration, seed, testType); + verifyTestValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testIteration, seed, testType); - verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); - verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); - verifyTestMatrix3x3Value("inverse", expectedTestValues.inverse, testValues.inverse, testType); + verifyTestValue("mul", expectedTestValues.mul, testValues.mul, testIteration, seed, testType); + verifyTestValue("transpose", expectedTestValues.transpose, testValues.transpose, testIteration, seed, testType); + verifyTestValue("inverse", expectedTestValues.inverse, testValues.inverse, testIteration, seed, testType); } }; diff --git a/22_CppCompat/CTgmathTester.h b/22_CppCompat/CTgmathTester.h index 63b0e483e..aa6c81d1c 100644 --- a/22_CppCompat/CTgmathTester.h +++ b/22_CppCompat/CTgmathTester.h @@ -3,358 +3,337 @@ #include "nbl/examples/examples.hpp" - #include "app_resources/common.hlsl" -#include "ITester.h" - +#include "nbl/examples/Tester/ITester.h" using namespace nbl; -class CTgmathTester final : public ITester +class CTgmathTester final : public ITester { + using base_t = ITester; + public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); + CTgmathTester(const uint32_t testBatchCount) + : base_t(testBatchCount) {}; - std::uniform_real_distribution realDistributionNeg(-50.0f, -1.0f); - std::uniform_real_distribution realDistributionPos(1.0f, 50.0f); +private: + TgmathIntputTestValues generateInputTestValues() override + { std::uniform_real_distribution realDistribution(-100.0f, 100.0f); std::uniform_real_distribution realDistributionSmall(1.0f, 4.0f); std::uniform_int_distribution intDistribution(-100, 100); std::uniform_int_distribution coinFlipDistribution(0, 1); - m_logger->log("tgmath.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - TgmathIntputTestValues testInput; - testInput.floor = realDistribution(mt); - testInput.isnan = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::quiet_NaN(); - testInput.isinf = coinFlipDistribution(mt) ? realDistribution(mt) : std::numeric_limits::infinity(); - testInput.powX = realDistributionSmall(mt); - testInput.powY = realDistributionSmall(mt); - testInput.exp = realDistributionSmall(mt); - testInput.exp2 = realDistributionSmall(mt); - testInput.log = realDistribution(mt); - testInput.log2 = realDistribution(mt); - testInput.absF = realDistribution(mt); - testInput.absI = intDistribution(mt); - testInput.sqrt = realDistribution(mt); - testInput.sin = realDistribution(mt); - testInput.cos = realDistribution(mt); - testInput.tan = realDistribution(mt); - testInput.asin = realDistribution(mt); - testInput.atan = realDistribution(mt); - testInput.sinh = realDistribution(mt); - testInput.cosh = realDistribution(mt); - testInput.tanh = realDistribution(mt); - testInput.asinh = realDistribution(mt); - testInput.acosh = realDistribution(mt); - testInput.atanh = realDistribution(mt); - testInput.atan2X = realDistribution(mt); - testInput.atan2Y = realDistribution(mt); - testInput.acos = realDistribution(mt); - testInput.modf = realDistribution(mt); - testInput.round = realDistribution(mt); - testInput.roundEven = coinFlipDistribution(mt) ? realDistributionSmall(mt) : (static_cast(intDistribution(mt) / 2) + 0.5f); - testInput.trunc = realDistribution(mt); - testInput.ceil = realDistribution(mt); - testInput.fmaX = realDistribution(mt); - testInput.fmaY = realDistribution(mt); - testInput.fmaZ = realDistribution(mt); - testInput.ldexpArg = realDistributionSmall(mt); - testInput.ldexpExp = intDistribution(mt); - testInput.erf = realDistribution(mt); - testInput.erfInv = realDistribution(mt); - - testInput.floorVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isnanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.isinfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.powXVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.powYVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.expVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.exp2Vec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.logVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.log2Vec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absFVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.absIVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.sqrtVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.cosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.sinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.coshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.tanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.asinhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acoshVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atanhVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2XVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.atan2YVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.acosVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.modfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.ldexpArgVec = float32_t3(realDistributionSmall(mt), realDistributionSmall(mt), realDistributionSmall(mt)); - testInput.ldexpExpVec = float32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); - testInput.erfVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.erfInvVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - - testInput.modfStruct = realDistribution(mt); - testInput.modfStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); - testInput.frexpStruct = realDistribution(mt); - testInput.frexpStructVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); + TgmathIntputTestValues testInput; + testInput.floor = realDistribution(getRandomEngine()); + testInput.isnan = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::quiet_NaN(); + testInput.isinf = coinFlipDistribution(getRandomEngine()) ? realDistribution(getRandomEngine()) : std::numeric_limits::infinity(); + testInput.powX = realDistributionSmall(getRandomEngine()); + testInput.powY = realDistributionSmall(getRandomEngine()); + testInput.exp = realDistributionSmall(getRandomEngine()); + testInput.exp2 = realDistributionSmall(getRandomEngine()); + testInput.log = realDistribution(getRandomEngine()); + testInput.log2 = realDistribution(getRandomEngine()); + testInput.absF = realDistribution(getRandomEngine()); + testInput.absI = intDistribution(getRandomEngine()); + testInput.sqrt = realDistribution(getRandomEngine()); + testInput.sin = realDistribution(getRandomEngine()); + testInput.cos = realDistribution(getRandomEngine()); + testInput.tan = realDistribution(getRandomEngine()); + testInput.asin = realDistribution(getRandomEngine()); + testInput.atan = realDistribution(getRandomEngine()); + testInput.sinh = realDistribution(getRandomEngine()); + testInput.cosh = realDistribution(getRandomEngine()); + testInput.tanh = realDistribution(getRandomEngine()); + testInput.asinh = realDistribution(getRandomEngine()); + testInput.acosh = realDistribution(getRandomEngine()); + testInput.atanh = realDistribution(getRandomEngine()); + testInput.atan2X = realDistribution(getRandomEngine()); + testInput.atan2Y = realDistribution(getRandomEngine()); + testInput.acos = realDistribution(getRandomEngine()); + testInput.modf = realDistribution(getRandomEngine()); + testInput.round = realDistribution(getRandomEngine()); + testInput.roundEven = coinFlipDistribution(getRandomEngine()) ? realDistributionSmall(getRandomEngine()) : (static_cast(intDistribution(getRandomEngine()) / 2) + 0.5f); + testInput.trunc = realDistribution(getRandomEngine()); + testInput.ceil = realDistribution(getRandomEngine()); + testInput.fmaX = realDistribution(getRandomEngine()); + testInput.fmaY = realDistribution(getRandomEngine()); + testInput.fmaZ = realDistribution(getRandomEngine()); + testInput.ldexpArg = realDistributionSmall(getRandomEngine()); + testInput.ldexpExp = intDistribution(getRandomEngine()); + testInput.erf = realDistribution(getRandomEngine()); + testInput.erfInv = realDistribution(getRandomEngine()); + + testInput.floorVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isnanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.isinfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.powXVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.powYVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.expVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.exp2Vec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.logVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.log2Vec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absFVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.absIVec = int32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.sqrtVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.cosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.sinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.coshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.tanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.asinhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acoshVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atanhVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2XVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.atan2YVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.acosVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.modfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.ldexpArgVec = float32_t3(realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine()), realDistributionSmall(getRandomEngine())); + testInput.ldexpExpVec = float32_t3(intDistribution(getRandomEngine()), intDistribution(getRandomEngine()), intDistribution(getRandomEngine())); + testInput.erfVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.erfInvVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + testInput.modfStruct = realDistribution(getRandomEngine()); + testInput.modfStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + testInput.frexpStruct = realDistribution(getRandomEngine()); + testInput.frexpStructVec = float32_t3(realDistribution(getRandomEngine()), realDistribution(getRandomEngine()), realDistribution(getRandomEngine())); + + return testInput; + } - // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values - TgmathTestValues expected; - expected.floor = std::floor(testInput.floor); - expected.isnan = std::isnan(testInput.isnan); - expected.isinf = std::isinf(testInput.isinf); - expected.pow = std::pow(testInput.powX, testInput.powY); - expected.exp = std::exp(testInput.exp); - expected.exp2 = std::exp2(testInput.exp2); - expected.log = std::log(testInput.log); - expected.log2 = std::log2(testInput.log2); - expected.absF = std::abs(testInput.absF); - expected.absI = std::abs(testInput.absI); - expected.sqrt = std::sqrt(testInput.sqrt); - expected.sin = std::sin(testInput.sin); - expected.cos = std::cos(testInput.cos); - expected.acos = std::acos(testInput.acos); - expected.tan = std::tan(testInput.tan); - expected.asin = std::asin(testInput.asin); - expected.atan = std::atan(testInput.atan); - expected.sinh = std::sinh(testInput.sinh); - expected.cosh = std::cosh(testInput.cosh); - expected.tanh = std::tanh(testInput.tanh); - expected.asinh = std::asinh(testInput.asinh); - expected.acosh = std::acosh(testInput.acosh); - expected.atanh = std::atanh(testInput.atanh); - expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); - expected.erf = std::erf(testInput.erf); + TgmathTestValues determineExpectedResults(const TgmathIntputTestValues& testInput) override + { + // use std library functions to determine expected test values, the output of functions from tgmath.hlsl will be verified against these values + TgmathTestValues expected; + expected.floor = std::floor(testInput.floor); + expected.isnan = std::isnan(testInput.isnan); + expected.isinf = std::isinf(testInput.isinf); + expected.pow = std::pow(testInput.powX, testInput.powY); + expected.exp = std::exp(testInput.exp); + expected.exp2 = std::exp2(testInput.exp2); + expected.log = std::log(testInput.log); + expected.log2 = std::log2(testInput.log2); + expected.absF = std::abs(testInput.absF); + expected.absI = std::abs(testInput.absI); + expected.sqrt = std::sqrt(testInput.sqrt); + expected.sin = std::sin(testInput.sin); + expected.cos = std::cos(testInput.cos); + expected.acos = std::acos(testInput.acos); + expected.tan = std::tan(testInput.tan); + expected.asin = std::asin(testInput.asin); + expected.atan = std::atan(testInput.atan); + expected.sinh = std::sinh(testInput.sinh); + expected.cosh = std::cosh(testInput.cosh); + expected.tanh = std::tanh(testInput.tanh); + expected.asinh = std::asinh(testInput.asinh); + expected.acosh = std::acosh(testInput.acosh); + expected.atanh = std::atanh(testInput.atanh); + expected.atan2 = std::atan2(testInput.atan2Y, testInput.atan2X); + expected.erf = std::erf(testInput.erf); + { + float tmp; + expected.modf = std::modf(testInput.modf, &tmp); + } + expected.round = std::round(testInput.round); + // TODO: uncomment when C++23 + //expected.roundEven = std::roundeven(testInput.roundEven); + // TODO: remove when C++23 + auto roundeven = [](const float& val) -> float { float tmp; - expected.modf = std::modf(testInput.modf, &tmp); - } - expected.round = std::round(testInput.round); - // TODO: uncomment when C++23 - //expected.roundEven = std::roundeven(testInput.roundEven); - // TODO: remove when C++23 - auto roundeven = [](const float& val) -> float + if (std::abs(std::modf(val, &tmp)) == 0.5f) { - float tmp; - if (std::abs(std::modf(val, &tmp)) == 0.5f) - { - int32_t result = static_cast(val); - if (result % 2 != 0) - result >= 0 ? ++result : --result; - return result; - } - - return std::round(val); - }; - expected.roundEven = roundeven(testInput.roundEven); - - expected.trunc = std::trunc(testInput.trunc); - expected.ceil = std::ceil(testInput.ceil); - expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); - expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); - - expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); - - expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); - expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); - - expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); - expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); - expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); - - expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); - expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); - expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); - expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); - expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); - expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); - expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); - expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); - expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); - expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); - expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); - expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); - expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); - expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); - expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); - expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); - expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); - expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); - expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); - expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); - expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); - { - float tmp; - expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); - } - expected.roundVec = float32_t3( - std::round(testInput.roundVec.x), - std::round(testInput.roundVec.y), - std::round(testInput.roundVec.z) - ); - // TODO: uncomment when C++23 - //expected.roundEven = float32_t( - // std::roundeven(testInput.roundEvenVec.x), - // std::roundeven(testInput.roundEvenVec.y), - // std::roundeven(testInput.roundEvenVec.z) - // ); - // TODO: remove when C++23 - expected.roundEvenVec = float32_t3( - roundeven(testInput.roundEvenVec.x), - roundeven(testInput.roundEvenVec.y), - roundeven(testInput.roundEvenVec.z) - ); - - expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); - expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); - expected.fmaVec = float32_t3( - std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), - std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), - std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) - ); - expected.ldexpVec = float32_t3( - std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), - std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), - std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) - ); - - { - ModfOutput expectedModfStructOutput; - expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); - expected.modfStruct = expectedModfStructOutput; - - ModfOutput expectedModfStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); - expected.modfStructVec = expectedModfStructOutputVec; - } - - { - FrexpOutput expectedFrexpStructOutput; - expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); - expected.frexpStruct = expectedFrexpStructOutput; - - FrexpOutput expectedFrexpStructOutputVec; - for (int i = 0; i < 3; ++i) - expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); - expected.frexpStructVec = expectedFrexpStructOutputVec; - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); + int32_t result = static_cast(val); + if (result % 2 != 0) + result >= 0 ? ++result : --result; + return result; + } + + return std::round(val); + }; + expected.roundEven = roundeven(testInput.roundEven); + + expected.trunc = std::trunc(testInput.trunc); + expected.ceil = std::ceil(testInput.ceil); + expected.fma = std::fma(testInput.fmaX, testInput.fmaY, testInput.fmaZ); + expected.ldexp = std::ldexp(testInput.ldexpArg, testInput.ldexpExp); + + expected.floorVec = float32_t3(std::floor(testInput.floorVec.x), std::floor(testInput.floorVec.y), std::floor(testInput.floorVec.z)); + + expected.isnanVec = float32_t3(std::isnan(testInput.isnanVec.x), std::isnan(testInput.isnanVec.y), std::isnan(testInput.isnanVec.z)); + expected.isinfVec = float32_t3(std::isinf(testInput.isinfVec.x), std::isinf(testInput.isinfVec.y), std::isinf(testInput.isinfVec.z)); + + expected.powVec.x = std::pow(testInput.powXVec.x, testInput.powYVec.x); + expected.powVec.y = std::pow(testInput.powXVec.y, testInput.powYVec.y); + expected.powVec.z = std::pow(testInput.powXVec.z, testInput.powYVec.z); + + expected.expVec = float32_t3(std::exp(testInput.expVec.x), std::exp(testInput.expVec.y), std::exp(testInput.expVec.z)); + expected.exp2Vec = float32_t3(std::exp2(testInput.exp2Vec.x), std::exp2(testInput.exp2Vec.y), std::exp2(testInput.exp2Vec.z)); + expected.logVec = float32_t3(std::log(testInput.logVec.x), std::log(testInput.logVec.y), std::log(testInput.logVec.z)); + expected.log2Vec = float32_t3(std::log2(testInput.log2Vec.x), std::log2(testInput.log2Vec.y), std::log2(testInput.log2Vec.z)); + expected.absFVec = float32_t3(std::abs(testInput.absFVec.x), std::abs(testInput.absFVec.y), std::abs(testInput.absFVec.z)); + expected.absIVec = float32_t3(std::abs(testInput.absIVec.x), std::abs(testInput.absIVec.y), std::abs(testInput.absIVec.z)); + expected.sqrtVec = float32_t3(std::sqrt(testInput.sqrtVec.x), std::sqrt(testInput.sqrtVec.y), std::sqrt(testInput.sqrtVec.z)); + expected.cosVec = float32_t3(std::cos(testInput.cosVec.x), std::cos(testInput.cosVec.y), std::cos(testInput.cosVec.z)); + expected.sinVec = float32_t3(std::sin(testInput.sinVec.x), std::sin(testInput.sinVec.y), std::sin(testInput.sinVec.z)); + expected.tanVec = float32_t3(std::tan(testInput.tanVec.x), std::tan(testInput.tanVec.y), std::tan(testInput.tanVec.z)); + expected.asinVec = float32_t3(std::asin(testInput.asinVec.x), std::asin(testInput.asinVec.y), std::asin(testInput.asinVec.z)); + expected.atanVec = float32_t3(std::atan(testInput.atanVec.x), std::atan(testInput.atanVec.y), std::atan(testInput.atanVec.z)); + expected.sinhVec = float32_t3(std::sinh(testInput.sinhVec.x), std::sinh(testInput.sinhVec.y), std::sinh(testInput.sinhVec.z)); + expected.coshVec = float32_t3(std::cosh(testInput.coshVec.x), std::cosh(testInput.coshVec.y), std::cosh(testInput.coshVec.z)); + expected.tanhVec = float32_t3(std::tanh(testInput.tanhVec.x), std::tanh(testInput.tanhVec.y), std::tanh(testInput.tanhVec.z)); + expected.asinhVec = float32_t3(std::asinh(testInput.asinhVec.x), std::asinh(testInput.asinhVec.y), std::asinh(testInput.asinhVec.z)); + expected.acoshVec = float32_t3(std::acosh(testInput.acoshVec.x), std::acosh(testInput.acoshVec.y), std::acosh(testInput.acoshVec.z)); + expected.atanhVec = float32_t3(std::atanh(testInput.atanhVec.x), std::atanh(testInput.atanhVec.y), std::atanh(testInput.atanhVec.z)); + expected.atan2Vec = float32_t3(std::atan2(testInput.atan2YVec.x, testInput.atan2XVec.x), std::atan2(testInput.atan2YVec.y, testInput.atan2XVec.y), std::atan2(testInput.atan2YVec.z, testInput.atan2XVec.z)); + expected.acosVec = float32_t3(std::acos(testInput.acosVec.x), std::acos(testInput.acosVec.y), std::acos(testInput.acosVec.z)); + expected.erfVec = float32_t3(std::erf(testInput.erfVec.x), std::erf(testInput.erfVec.y), std::erf(testInput.erfVec.z)); + { + float tmp; + expected.modfVec = float32_t3(std::modf(testInput.modfVec.x, &tmp), std::modf(testInput.modfVec.y, &tmp), std::modf(testInput.modfVec.z, &tmp)); } - m_logger->log("tgmath.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } + expected.roundVec = float32_t3( + std::round(testInput.roundVec.x), + std::round(testInput.roundVec.y), + std::round(testInput.roundVec.z) + ); + // TODO: uncomment when C++23 + //expected.roundEven = float32_t( + // std::roundeven(testInput.roundEvenVec.x), + // std::roundeven(testInput.roundEvenVec.y), + // std::roundeven(testInput.roundEvenVec.z) + // ); + // TODO: remove when C++23 + expected.roundEvenVec = float32_t3( + roundeven(testInput.roundEvenVec.x), + roundeven(testInput.roundEvenVec.y), + roundeven(testInput.roundEvenVec.z) + ); + + expected.truncVec = float32_t3(std::trunc(testInput.truncVec.x), std::trunc(testInput.truncVec.y), std::trunc(testInput.truncVec.z)); + expected.ceilVec = float32_t3(std::ceil(testInput.ceilVec.x), std::ceil(testInput.ceilVec.y), std::ceil(testInput.ceilVec.z)); + expected.fmaVec = float32_t3( + std::fma(testInput.fmaXVec.x, testInput.fmaYVec.x, testInput.fmaZVec.x), + std::fma(testInput.fmaXVec.y, testInput.fmaYVec.y, testInput.fmaZVec.y), + std::fma(testInput.fmaXVec.z, testInput.fmaYVec.z, testInput.fmaZVec.z) + ); + expected.ldexpVec = float32_t3( + std::ldexp(testInput.ldexpArgVec.x, testInput.ldexpExpVec.x), + std::ldexp(testInput.ldexpArgVec.y, testInput.ldexpExpVec.y), + std::ldexp(testInput.ldexpArgVec.z, testInput.ldexpExpVec.z) + ); -private: - inline static constexpr int Iterations = 100u; + { + ModfOutput expectedModfStructOutput; + expectedModfStructOutput.fractionalPart = std::modf(testInput.modfStruct, &expectedModfStructOutput.wholeNumberPart); + expected.modfStruct = expectedModfStructOutput; + + ModfOutput expectedModfStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedModfStructOutputVec.fractionalPart[i] = std::modf(testInput.modfStructVec[i], &expectedModfStructOutputVec.wholeNumberPart[i]); + expected.modfStructVec = expectedModfStructOutputVec; + } - void performCpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues cpuTestValues; - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); - - } + { + FrexpOutput expectedFrexpStructOutput; + expectedFrexpStructOutput.significand = std::frexp(testInput.frexpStruct, &expectedFrexpStructOutput.exponent); + expected.frexpStruct = expectedFrexpStructOutput; + + FrexpOutput expectedFrexpStructOutputVec; + for (int i = 0; i < 3; ++i) + expectedFrexpStructOutputVec.significand[i] = std::frexp(testInput.frexpStructVec[i], &expectedFrexpStructOutputVec.exponent[i]); + expected.frexpStructVec = expectedFrexpStructOutputVec; + } - void performGpuTests(const TgmathIntputTestValues& commonTestInputValues, const TgmathTestValues& expectedTestValues) - { - TgmathTestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + return expected; } - void verifyTestValues(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, ITester::TestType testType) + void verifyTestResults(const TgmathTestValues& expectedTestValues, const TgmathTestValues& testValues, const size_t testIteration, const uint32_t seed, TestType testType) override { // TODO: figure out input for functions: sinh, cosh so output isn't a crazy low number // very low numbers generate comparison errors - verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testType); - verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testType); - verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testType); - verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testType); - verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testType); - verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testType); - verifyTestValue("log", expectedTestValues.log, testValues.log, testType); - verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testType); - verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testType); - verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testType); - verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testType); - verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testType); - verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testType); - verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testType); - verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testType); - verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testType); - verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testType); - //verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testType); - //verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testType); - verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testType); - verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testType); - verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testType); - verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testType); - verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testType); - verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testType); - verifyTestValue("round", expectedTestValues.round, testValues.round, testType); - verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testType); - verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testType); - verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testType); - verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testType); - verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testType); - verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testType); - //verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testType); - - verifyTestVector3dValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testType); - verifyTestVector3dValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testType); - verifyTestVector3dValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testType); - verifyTestVector3dValue("powVec", expectedTestValues.powVec, testValues.powVec, testType); - verifyTestVector3dValue("expVec", expectedTestValues.expVec, testValues.expVec, testType); - verifyTestVector3dValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testType); - verifyTestVector3dValue("logVec", expectedTestValues.logVec, testValues.logVec, testType); - verifyTestVector3dValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testType); - verifyTestVector3dValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testType); - verifyTestVector3dValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testType); - verifyTestVector3dValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testType); - verifyTestVector3dValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testType); - verifyTestVector3dValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testType); - verifyTestVector3dValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testType); - verifyTestVector3dValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testType); - verifyTestVector3dValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testType); - verifyTestVector3dValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testType); - verifyTestVector3dValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testType); - verifyTestVector3dValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testType); - verifyTestVector3dValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testType); - verifyTestVector3dValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testType); - verifyTestVector3dValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testType); - verifyTestVector3dValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testType); - verifyTestVector3dValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testType); - //verifyTestVector3dValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testType); - //verifyTestVector3dValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testType); - verifyTestVector3dValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testType); - verifyTestVector3dValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testType); - verifyTestVector3dValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testType); - verifyTestVector3dValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testType); - verifyTestVector3dValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testType); - verifyTestVector3dValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testType); - //verifyTestVector3dValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testType); + verifyTestValue("floor", expectedTestValues.floor, testValues.floor, testIteration, seed, testType); + verifyTestValue("isnan", expectedTestValues.isnan, testValues.isnan, testIteration, seed, testType); + verifyTestValue("isinf", expectedTestValues.isinf, testValues.isinf, testIteration, seed, testType); + verifyTestValue("pow", expectedTestValues.pow, testValues.pow, testIteration, seed, testType); + verifyTestValue("exp", expectedTestValues.exp, testValues.exp, testIteration, seed, testType); + verifyTestValue("exp2", expectedTestValues.exp2, testValues.exp2, testIteration, seed, testType); + verifyTestValue("log", expectedTestValues.log, testValues.log, testIteration, seed, testType); + verifyTestValue("log2", expectedTestValues.log2, testValues.log2, testIteration, seed, testType); + verifyTestValue("absF", expectedTestValues.absF, testValues.absF, testIteration, seed, testType); + verifyTestValue("absI", expectedTestValues.absI, testValues.absI, testIteration, seed, testType); + verifyTestValue("sqrt", expectedTestValues.sqrt, testValues.sqrt, testIteration, seed, testType); + verifyTestValue("sin", expectedTestValues.sin, testValues.sin, testIteration, seed, testType); + verifyTestValue("cos", expectedTestValues.cos, testValues.cos, testIteration, seed, testType); + verifyTestValue("acos", expectedTestValues.acos, testValues.acos, testIteration, seed, testType); + verifyTestValue("tan", expectedTestValues.tan, testValues.tan, testIteration, seed, testType); + verifyTestValue("asin", expectedTestValues.asin, testValues.asin, testIteration, seed, testType); + verifyTestValue("atan", expectedTestValues.atan, testValues.atan, testIteration, seed, testType); + //verifyTestValue("sinh", expectedTestValues.sinh, testValues.sinh, testIteration, seed, testType); + //verifyTestValue("cosh", expectedTestValues.cosh, testValues.cosh, testIteration, seed, testType); + verifyTestValue("tanh", expectedTestValues.tanh, testValues.tanh, testIteration, seed, testType); + verifyTestValue("asinh", expectedTestValues.asinh, testValues.asinh, testIteration, seed, testType); + verifyTestValue("acosh", expectedTestValues.acosh, testValues.acosh, testIteration, seed, testType); + verifyTestValue("atanh", expectedTestValues.atanh, testValues.atanh, testIteration, seed, testType); + verifyTestValue("atan2", expectedTestValues.atan2, testValues.atan2, testIteration, seed, testType); + verifyTestValue("modf", expectedTestValues.modf, testValues.modf, testIteration, seed, testType); + verifyTestValue("round", expectedTestValues.round, testValues.round, testIteration, seed, testType); + verifyTestValue("roundEven", expectedTestValues.roundEven, testValues.roundEven, testIteration, seed, testType); + verifyTestValue("trunc", expectedTestValues.trunc, testValues.trunc, testIteration, seed, testType); + verifyTestValue("ceil", expectedTestValues.ceil, testValues.ceil, testIteration, seed, testType); + verifyTestValue("fma", expectedTestValues.fma, testValues.fma, testIteration, seed, testType); + verifyTestValue("ldexp", expectedTestValues.ldexp, testValues.ldexp, testIteration, seed, testType); + verifyTestValue("erf", expectedTestValues.erf, testValues.erf, testIteration, seed, testType); + //verifyTestValue("erfInv", expectedTestValues.erfInv, testValues.erfInv, testIteration, seed, testType); + + verifyTestValue("floorVec", expectedTestValues.floorVec, testValues.floorVec, testIteration, seed, testType); + verifyTestValue("isnanVec", expectedTestValues.isnanVec, testValues.isnanVec, testIteration, seed, testType); + verifyTestValue("isinfVec", expectedTestValues.isinfVec, testValues.isinfVec, testIteration, seed, testType); + verifyTestValue("powVec", expectedTestValues.powVec, testValues.powVec, testIteration, seed, testType); + verifyTestValue("expVec", expectedTestValues.expVec, testValues.expVec, testIteration, seed, testType); + verifyTestValue("exp2Vec", expectedTestValues.exp2Vec, testValues.exp2Vec, testIteration, seed, testType); + verifyTestValue("logVec", expectedTestValues.logVec, testValues.logVec, testIteration, seed, testType); + verifyTestValue("log2Vec", expectedTestValues.log2Vec, testValues.log2Vec, testIteration, seed, testType); + verifyTestValue("absFVec", expectedTestValues.absFVec, testValues.absFVec, testIteration, seed, testType); + verifyTestValue("absIVec", expectedTestValues.absIVec, testValues.absIVec, testIteration, seed, testType); + verifyTestValue("sqrtVec", expectedTestValues.sqrtVec, testValues.sqrtVec, testIteration, seed, testType); + verifyTestValue("sinVec", expectedTestValues.sinVec, testValues.sinVec, testIteration, seed, testType); + verifyTestValue("cosVec", expectedTestValues.cosVec, testValues.cosVec, testIteration, seed, testType); + verifyTestValue("acosVec", expectedTestValues.acosVec, testValues.acosVec, testIteration, seed, testType); + verifyTestValue("modfVec", expectedTestValues.modfVec, testValues.modfVec, testIteration, seed, testType); + verifyTestValue("roundVec", expectedTestValues.roundVec, testValues.roundVec, testIteration, seed, testType); + verifyTestValue("roundEvenVec", expectedTestValues.roundEvenVec, testValues.roundEvenVec, testIteration, seed, testType); + verifyTestValue("truncVec", expectedTestValues.truncVec, testValues.truncVec, testIteration, seed, testType); + verifyTestValue("ceilVec", expectedTestValues.ceilVec, testValues.ceilVec, testIteration, seed, testType); + verifyTestValue("fmaVec", expectedTestValues.fmaVec, testValues.fmaVec, testIteration, seed, testType); + verifyTestValue("ldexp", expectedTestValues.ldexpVec, testValues.ldexpVec, testIteration, seed, testType); + verifyTestValue("tanVec", expectedTestValues.tanVec, testValues.tanVec, testIteration, seed, testType); + verifyTestValue("asinVec", expectedTestValues.asinVec, testValues.asinVec, testIteration, seed, testType); + verifyTestValue("atanVec", expectedTestValues.atanVec, testValues.atanVec, testIteration, seed, testType); + //verifyTestValue("sinhVec", expectedTestValues.sinhVec, testValues.sinhVec, testIteration, seed, testType); + //verifyTestValue("coshVec", expectedTestValues.coshVec, testValues.coshVec, testIteration, seed, testType); + verifyTestValue("tanhVec", expectedTestValues.tanhVec, testValues.tanhVec, testIteration, seed, testType); + verifyTestValue("asinhVec", expectedTestValues.asinhVec, testValues.asinhVec, testIteration, seed, testType); + verifyTestValue("acoshVec", expectedTestValues.acoshVec, testValues.acoshVec, testIteration, seed, testType); + verifyTestValue("atanhVec", expectedTestValues.atanhVec, testValues.atanhVec, testIteration, seed, testType); + verifyTestValue("atan2Vec", expectedTestValues.atan2Vec, testValues.atan2Vec, testIteration, seed, testType); + verifyTestValue("erfVec", expectedTestValues.erfVec, testValues.erfVec, testIteration, seed, testType); + //verifyTestValue("erfInvVec", expectedTestValues.erfInvVec, testValues.erfInvVec, testIteration, seed, testType); // verify output of struct producing functions - verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testType); - verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testType); - verifyTestVector3dValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testType); - - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testType); - verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testType); - verifyTestVector3dValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testType); + verifyTestValue("modfStruct", expectedTestValues.modfStruct.fractionalPart, testValues.modfStruct.fractionalPart, testIteration, seed, testType); + verifyTestValue("modfStruct", expectedTestValues.modfStruct.wholeNumberPart, testValues.modfStruct.wholeNumberPart, testIteration, seed, testType); + verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.fractionalPart, testValues.modfStructVec.fractionalPart, testIteration, seed, testType); + verifyTestValue("modfStructVec", expectedTestValues.modfStructVec.wholeNumberPart, testValues.modfStructVec.wholeNumberPart, testIteration, seed, testType); + + verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.significand, testValues.frexpStruct.significand, testIteration, seed, testType); + verifyTestValue("frexpStruct", expectedTestValues.frexpStruct.exponent, testValues.frexpStruct.exponent, testIteration, seed, testType); + verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.significand, testValues.frexpStructVec.significand, testIteration, seed, testType); + verifyTestValue("frexpStructVec", expectedTestValues.frexpStructVec.exponent, testValues.frexpStructVec.exponent, testIteration, seed, testType); } }; diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h deleted file mode 100644 index 39ceb8141..000000000 --- a/22_CppCompat/ITester.h +++ /dev/null @@ -1,337 +0,0 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ - - -#include "nbl/examples/examples.hpp" - -#include "app_resources/common.hlsl" -#include "nbl/asset/metadata/CHLSLMetadata.h" - - -using namespace nbl; - -class ITester -{ -public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - - struct PipelineSetupData - { - std::string testShaderPath; - - core::smart_refctd_ptr device; - core::smart_refctd_ptr api; - core::smart_refctd_ptr assetMgr; - core::smart_refctd_ptr logger; - video::IPhysicalDevice* physicalDevice; - uint32_t computeFamilyIndex; - }; - - template - void setupPipeline(const PipelineSetupData& pipleineSetupData) - { - // setting up pipeline in the constructor - m_device = core::smart_refctd_ptr(pipleineSetupData.device); - m_physicalDevice = pipleineSetupData.physicalDevice; - m_api = core::smart_refctd_ptr(pipleineSetupData.api); - m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); - m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); - m_queueFamily = pipleineSetupData.computeFamilyIndex; - m_semaphoreCounter = 0; - m_semaphore = m_device->createSemaphore(0); - m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) - logFail("Failed to create Command Buffers!\n"); - - // Load shaders, set up pipeline - core::smart_refctd_ptr shader; - auto shaderStage = ESS_UNKNOWN; - { - asset::IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty() || assetBundle.getAssetType() != asset::IAsset::ET_SHADER) - { - logFail("Could not load shader!"); - assert(0); - } - - // It would be super weird if loading a shader from a file produced more than 1 asset - assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - const auto hlslMetadata = static_cast(assetBundle.getMetadata()); - shaderStage = hlslMetadata->shaderStages->front(); - - auto* compilerSet = m_assetMgr->getCompilerSet(); - - asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = shaderStage; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - shader = compilerSet->compileToSPIRV(source.get(), options); - } - - if (!shader) - logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); - - video::IGPUDescriptorSetLayout::SBinding bindings[2] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ShaderStage::ESS_COMPUTE, - .count = 1 - } - }; - - core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - logFail("Failed to create a Descriptor Layout!\n"); - - m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); - if (!m_pplnLayout) - logFail("Failed to create a Pipeline Layout!\n"); - - { - video::IGPUComputePipeline::SCreationParams params = {}; - params.layout = m_pplnLayout.get(); - params.shader.entryPoint = "main"; - params.shader.shader = shader.get(); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - logFail("Failed to create pipelines (compile & link shaders)!\n"); - } - - // Allocate memory of the input buffer - { - constexpr size_t BufferSize = sizeof(InputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); - if (!inputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_inputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(inputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - // Allocate memory of the output buffer - { - constexpr size_t BufferSize = sizeof(OutputStruct); - - video::IGPUBuffer::SCreationParams params = {}; - params.size = BufferSize; - params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); - if (!outputBuff) - logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); - reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); - - m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); - if (!m_outputBufferAllocation.isValid()) - logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); - - assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); - core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); - - { - video::IGPUDescriptorSet::SDescriptorInfo info[1]; - info[0].desc = core::smart_refctd_ptr(outputBuff); - info[0].info.buffer = { .offset = 0,.size = BufferSize }; - video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { - {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} - }; - m_device->updateDescriptorSets(writes, {}); - } - } - - if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); - if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); - - m_queue = m_device->getQueue(m_queueFamily, 0); - } - - enum class TestType - { - CPU, - GPU - }; - - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal) - double(testVal)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << testVal << " expected value: " << expectedVal << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestVector3dValue(const std::string& memberName, const nbl::hlsl::vector& expectedVal, const nbl::hlsl::vector& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal.x) - double(testVal.x)) <= MaxAllowedError && - std::abs(double(expectedVal.y) - double(testVal.y)) <= MaxAllowedError && - std::abs(double(expectedVal.z) - double(testVal.z)) <= MaxAllowedError) - return; - - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << - testVal.x << ' ' << testVal.y << ' ' << testVal.z << - " expected value: " << expectedVal.x << ' ' << expectedVal.y << ' ' << expectedVal.z << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestMatrix3x3Value(const std::string& memberName, const nbl::hlsl::matrix& expectedVal, const nbl::hlsl::matrix& testVal, const TestType testType) - { - for (int i = 0; i < 3; ++i) - { - auto expectedValRow = expectedVal[i]; - auto testValRow = testVal[i]; - verifyTestVector3dValue(memberName, expectedValRow, testValRow, testType); - } - } - -protected: - uint32_t m_queueFamily; - core::smart_refctd_ptr m_device; - core::smart_refctd_ptr m_api; - video::IPhysicalDevice* m_physicalDevice; - core::smart_refctd_ptr m_assetMgr; - core::smart_refctd_ptr m_logger; - video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; - video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; - core::smart_refctd_ptr m_cmdbuf = nullptr; - core::smart_refctd_ptr m_cmdpool = nullptr; - core::smart_refctd_ptr m_ds = nullptr; - core::smart_refctd_ptr m_pplnLayout = nullptr; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_semaphore; - video::IQueue* m_queue; - uint64_t m_semaphoreCounter; - - template - OutputStruct dispatch(const InputStruct& input) - { - // Update input buffer - if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) - logFail("Failed to map the Device Memory!\n"); - - const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); - if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memoryRange); - - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); - - m_inputBufferAllocation.memory->unmap(); - - // record command buffer - m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); - m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); - m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); - m_cmdbuf->endDebugMarker(); - m_cmdbuf->end(); - - video::IQueue::SSubmitInfo submitInfos[1] = {}; - const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; - submitInfos[0].commandBuffers = cmdbufs; - const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; - submitInfos[0].signalSemaphores = signals; - - m_api->startCapture(); - m_queue->submit(submitInfos); - m_api->endCapture(); - - m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); - m_device->waitIdle(); - - return output; - } - -private: - template - inline void logFail(const char* msg, Args&&... args) - { - m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); - exit(-1); - } -}; - -#endif \ No newline at end of file diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl index dc3ff5fcd..7fed20bbe 100644 --- a/22_CppCompat/app_resources/common.hlsl +++ b/22_CppCompat/app_resources/common.hlsl @@ -208,82 +208,6 @@ struct TgmathTestValues ModfOutput modfStructVec; FrexpOutput frexpStruct; FrexpOutput frexpStructVec; - - void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) - { - floor = nbl::hlsl::floor(input.floor); - isnan = nbl::hlsl::isnan(input.isnan); - isinf = nbl::hlsl::isinf(input.isinf); - pow = nbl::hlsl::pow(input.powX, input.powY); - exp = nbl::hlsl::exp(input.exp); - exp2 = nbl::hlsl::exp2(input.exp2); - log = nbl::hlsl::log(input.log); - log2 = nbl::hlsl::log2(input.log2); - absF = nbl::hlsl::abs(input.absF); - absI = nbl::hlsl::abs(input.absI); - sqrt = nbl::hlsl::sqrt(input.sqrt); - sin = nbl::hlsl::sin(input.sin); - cos = nbl::hlsl::cos(input.cos); - tan = nbl::hlsl::tan(input.tan); - asin = nbl::hlsl::asin(input.asin); - atan = nbl::hlsl::atan(input.atan); - sinh = nbl::hlsl::sinh(input.sinh); - cosh = nbl::hlsl::cosh(input.cosh); - tanh = nbl::hlsl::tanh(input.tanh); - asinh = nbl::hlsl::asinh(input.asinh); - acosh = nbl::hlsl::acosh(input.acosh); - atanh = nbl::hlsl::atanh(input.atanh); - atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); - erf = nbl::hlsl::erf(input.erf); - erfInv = nbl::hlsl::erfInv(input.erfInv); - acos = nbl::hlsl::acos(input.acos); - modf = nbl::hlsl::modf(input.modf); - round = nbl::hlsl::round(input.round); - roundEven = nbl::hlsl::roundEven(input.roundEven); - trunc = nbl::hlsl::trunc(input.trunc); - ceil = nbl::hlsl::ceil(input.ceil); - fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); - ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); - - floorVec = nbl::hlsl::floor(input.floorVec); - isnanVec = nbl::hlsl::isnan(input.isnanVec); - isinfVec = nbl::hlsl::isinf(input.isinfVec); - powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); - expVec = nbl::hlsl::exp(input.expVec); - exp2Vec = nbl::hlsl::exp2(input.exp2Vec); - logVec = nbl::hlsl::log(input.logVec); - log2Vec = nbl::hlsl::log2(input.log2Vec); - absFVec = nbl::hlsl::abs(input.absFVec); - absIVec = nbl::hlsl::abs(input.absIVec); - sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); - sinVec = nbl::hlsl::sin(input.sinVec); - cosVec = nbl::hlsl::cos(input.cosVec); - tanVec = nbl::hlsl::tan(input.tanVec); - asinVec = nbl::hlsl::asin(input.asinVec); - atanVec = nbl::hlsl::atan(input.atanVec); - sinhVec = nbl::hlsl::sinh(input.sinhVec); - coshVec = nbl::hlsl::cosh(input.coshVec); - tanhVec = nbl::hlsl::tanh(input.tanhVec); - asinhVec = nbl::hlsl::asinh(input.asinhVec); - acoshVec = nbl::hlsl::acosh(input.acoshVec); - atanhVec = nbl::hlsl::atanh(input.atanhVec); - atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); - acosVec = nbl::hlsl::acos(input.acosVec); - modfVec = nbl::hlsl::modf(input.modfVec); - roundVec = nbl::hlsl::round(input.roundVec); - roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); - truncVec = nbl::hlsl::trunc(input.truncVec); - ceilVec = nbl::hlsl::ceil(input.ceilVec); - fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); - ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); - erfVec = nbl::hlsl::erf(input.erfVec); - erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); - - modfStruct = nbl::hlsl::modfStruct(input.modfStruct); - modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); - frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); - frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); - } }; struct IntrinsicsIntputTestValues @@ -416,58 +340,140 @@ struct IntrinsicsTestValues spirv::SubBorrowOutput subBorrow; spirv::AddCarryOutput addCarryVec; spirv::SubBorrowOutput subBorrowVec; +}; - void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) +struct IntrinsicsTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input, NBL_REF_ARG(IntrinsicsTestValues) output) { - bitCount = nbl::hlsl::bitCount(input.bitCount); - cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); - clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); - length = nbl::hlsl::length(input.length); - normalize = nbl::hlsl::normalize(input.normalize); - dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); - determinant = nbl::hlsl::determinant(input.determinant); - findMSB = nbl::hlsl::findMSB(input.findMSB); - findLSB = nbl::hlsl::findLSB(input.findLSB); - inverse = nbl::hlsl::inverse(input.inverse); - transpose = nbl::hlsl::transpose(input.transpose); - mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); + output.bitCount = nbl::hlsl::bitCount(input.bitCount); + output.cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); + output.clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); + output.length = nbl::hlsl::length(input.length); + output.normalize = nbl::hlsl::normalize(input.normalize); + output.dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); + output.determinant = nbl::hlsl::determinant(input.determinant); + output.findMSB = nbl::hlsl::findMSB(input.findMSB); + output.findLSB = nbl::hlsl::findLSB(input.findLSB); + output.inverse = nbl::hlsl::inverse(input.inverse); + output.transpose = nbl::hlsl::transpose(input.transpose); + output.mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); // TODO: fix min and max - min = nbl::hlsl::min(input.minA, input.minB); - max = nbl::hlsl::max(input.maxA, input.maxB); - rsqrt = nbl::hlsl::rsqrt(input.rsqrt); - bitReverse = nbl::hlsl::bitReverse(input.bitReverse); - frac = nbl::hlsl::fract(input.frac); - mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); - sign = nbl::hlsl::sign(input.sign); - radians = nbl::hlsl::radians(input.radians); - degrees = nbl::hlsl::degrees(input.degrees); - step = nbl::hlsl::step(input.stepEdge, input.stepX); - smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); - - bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); - clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); - findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); - findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); + output.min = nbl::hlsl::min(input.minA, input.minB); + output.max = nbl::hlsl::max(input.maxA, input.maxB); + output.rsqrt = nbl::hlsl::rsqrt(input.rsqrt); + output.bitReverse = nbl::hlsl::bitReverse(input.bitReverse); + output.frac = nbl::hlsl::fract(input.frac); + output.mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); + output.sign = nbl::hlsl::sign(input.sign); + output.radians = nbl::hlsl::radians(input.radians); + output.degrees = nbl::hlsl::degrees(input.degrees); + output.step = nbl::hlsl::step(input.stepEdge, input.stepX); + output.smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); + + output.bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); + output.clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); + output.findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); + output.findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); // TODO: fix min and max - minVec = nbl::hlsl::min(input.minAVec, input.minBVec); - maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); - rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); - bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); - fracVec = nbl::hlsl::fract(input.fracVec); - mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); - - signVec = nbl::hlsl::sign(input.signVec); - radiansVec = nbl::hlsl::radians(input.radiansVec); - degreesVec = nbl::hlsl::degrees(input.degreesVec); - stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); - smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); - faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); - reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); - refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); - addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); - subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); - addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); - subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + output.minVec = nbl::hlsl::min(input.minAVec, input.minBVec); + output.maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); + output.rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); + output.bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); + output.fracVec = nbl::hlsl::fract(input.fracVec); + output.mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); + + output.signVec = nbl::hlsl::sign(input.signVec); + output.radiansVec = nbl::hlsl::radians(input.radiansVec); + output.degreesVec = nbl::hlsl::degrees(input.degreesVec); + output.stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); + output.smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); + output.faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); + output.reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); + output.refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); + output.addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); + output.subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); + output.addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); + output.subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + } +}; + +struct TgmathTestExecutor +{ + void operator()(NBL_CONST_REF_ARG(TgmathIntputTestValues) input, NBL_REF_ARG(TgmathTestValues) output) + { + output.floor = nbl::hlsl::floor(input.floor); + output.isnan = nbl::hlsl::isnan(input.isnan); + output.isinf = nbl::hlsl::isinf(input.isinf); + output.pow = nbl::hlsl::pow(input.powX, input.powY); + output.exp = nbl::hlsl::exp(input.exp); + output.exp2 = nbl::hlsl::exp2(input.exp2); + output.log = nbl::hlsl::log(input.log); + output.log2 = nbl::hlsl::log2(input.log2); + output.absF = nbl::hlsl::abs(input.absF); + output.absI = nbl::hlsl::abs(input.absI); + output.sqrt = nbl::hlsl::sqrt(input.sqrt); + output.sin = nbl::hlsl::sin(input.sin); + output.cos = nbl::hlsl::cos(input.cos); + output.tan = nbl::hlsl::tan(input.tan); + output.asin = nbl::hlsl::asin(input.asin); + output.atan = nbl::hlsl::atan(input.atan); + output.sinh = nbl::hlsl::sinh(input.sinh); + output.cosh = nbl::hlsl::cosh(input.cosh); + output.tanh = nbl::hlsl::tanh(input.tanh); + output.asinh = nbl::hlsl::asinh(input.asinh); + output.acosh = nbl::hlsl::acosh(input.acosh); + output.atanh = nbl::hlsl::atanh(input.atanh); + output.atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); + output.erf = nbl::hlsl::erf(input.erf); + output.erfInv = nbl::hlsl::erfInv(input.erfInv); + output.acos = nbl::hlsl::acos(input.acos); + output.modf = nbl::hlsl::modf(input.modf); + output.round = nbl::hlsl::round(input.round); + output.roundEven = nbl::hlsl::roundEven(input.roundEven); + output.trunc = nbl::hlsl::trunc(input.trunc); + output.ceil = nbl::hlsl::ceil(input.ceil); + output.fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); + output.ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); + + output.floorVec = nbl::hlsl::floor(input.floorVec); + output.isnanVec = nbl::hlsl::isnan(input.isnanVec); + output.isinfVec = nbl::hlsl::isinf(input.isinfVec); + output.powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); + output.expVec = nbl::hlsl::exp(input.expVec); + output.exp2Vec = nbl::hlsl::exp2(input.exp2Vec); + output.logVec = nbl::hlsl::log(input.logVec); + output.log2Vec = nbl::hlsl::log2(input.log2Vec); + output.absFVec = nbl::hlsl::abs(input.absFVec); + output.absIVec = nbl::hlsl::abs(input.absIVec); + output.sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); + output.sinVec = nbl::hlsl::sin(input.sinVec); + output.cosVec = nbl::hlsl::cos(input.cosVec); + output.tanVec = nbl::hlsl::tan(input.tanVec); + output.asinVec = nbl::hlsl::asin(input.asinVec); + output.atanVec = nbl::hlsl::atan(input.atanVec); + output.sinhVec = nbl::hlsl::sinh(input.sinhVec); + output.coshVec = nbl::hlsl::cosh(input.coshVec); + output.tanhVec = nbl::hlsl::tanh(input.tanhVec); + output.asinhVec = nbl::hlsl::asinh(input.asinhVec); + output.acoshVec = nbl::hlsl::acosh(input.acoshVec); + output.atanhVec = nbl::hlsl::atanh(input.atanhVec); + output.atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); + output.acosVec = nbl::hlsl::acos(input.acosVec); + output.modfVec = nbl::hlsl::modf(input.modfVec); + output.roundVec = nbl::hlsl::round(input.roundVec); + output.roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); + output.truncVec = nbl::hlsl::trunc(input.truncVec); + output.ceilVec = nbl::hlsl::ceil(input.ceilVec); + output.fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); + output.ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); + output.erfVec = nbl::hlsl::erf(input.erfVec); + output.erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); + + output.modfStruct = nbl::hlsl::modfStruct(input.modfStruct); + output.modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); + output.frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); + output.frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); } }; diff --git a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl index df7cef1cf..5fe3b4c20 100644 --- a/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl +++ b/22_CppCompat/app_resources/intrinsicsTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + IntrinsicsTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/app_resources/tgmathTest.comp.hlsl b/22_CppCompat/app_resources/tgmathTest.comp.hlsl index 5d93ffb64..6115eebc6 100644 --- a/22_CppCompat/app_resources/tgmathTest.comp.hlsl +++ b/22_CppCompat/app_resources/tgmathTest.comp.hlsl @@ -4,13 +4,16 @@ #pragma shader_stage(compute) #include "common.hlsl" +#include [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] -void main(uint3 invocationID : SV_DispatchThreadID) +[numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] +void main() { - if(invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); -} + const uint invID = nbl::hlsl::glsl::gl_GlobalInvocationID().x; + TgmathTestExecutor executor; + executor(inputTestValues[invID], outputTestValues[invID]); +} \ No newline at end of file diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 70c8d7b3a..9bfcbb894 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -59,25 +59,33 @@ class CompatibilityTest final : public application_templates::MonoDeviceApplicat if (!asset_base_t::onAppInitialized(std::move(system))) return false; - ITester::PipelineSetupData pplnSetupData; - pplnSetupData.device = m_device; - pplnSetupData.api = m_api; - pplnSetupData.assetMgr = m_assetMgr; - pplnSetupData.logger = m_logger; - pplnSetupData.physicalDevice = m_physicalDevice; - pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); - { - CTgmathTester tgmathTester; + CTgmathTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/tgmathTest.comp.hlsl"; - tgmathTester.setupPipeline(pplnSetupData); - tgmathTester.performTests(); + + CTgmathTester tgmathTester(4); + tgmathTester.setupPipeline(pplnSetupData); + tgmathTester.performTestsAndVerifyResults(); } { - CIntrinsicsTester intrinsicsTester; + CIntrinsicsTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); pplnSetupData.testShaderPath = "app_resources/intrinsicsTest.comp.hlsl"; - intrinsicsTester.setupPipeline(pplnSetupData); - intrinsicsTester.performTests(); + + CIntrinsicsTester intrinsicsTester(4); + intrinsicsTester.setupPipeline(pplnSetupData); + intrinsicsTester.performTestsAndVerifyResults(); } m_queue = m_device->getQueue(0, 0); diff --git a/14_Mortons/ITester.h b/common/include/nbl/examples/Tester/ITester.h similarity index 68% rename from 14_Mortons/ITester.h rename to common/include/nbl/examples/Tester/ITester.h index 3be6d1d6b..0027b8b70 100644 --- a/14_Mortons/ITester.h +++ b/common/include/nbl/examples/Tester/ITester.h @@ -1,20 +1,16 @@ -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ +#ifndef _NBL_COMMON_I_TESTER_INCLUDED_ +#define _NBL_COMMON_I_TESTER_INCLUDED_ #include -#include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include +#include using namespace nbl; -class ITester +template +class ITester { public: - virtual ~ITester() - { - m_outputBufferAllocation.memory->unmap(); - }; - struct PipelineSetupData { std::string testShaderPath; @@ -26,7 +22,6 @@ class ITester uint32_t computeFamilyIndex; }; - template void setupPipeline(const PipelineSetupData& pipleineSetupData) { // setting up pipeline in the constructor @@ -57,7 +52,12 @@ class ITester assert(assets.size() == 1); core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - shader = m_device->compileShader({source.get()}); + auto overridenSource = asset::CHLSLCompiler::createOverridenCopy( + source.get(), "#define WORKGROUP_SIZE %d\n#define TEST_COUNT %d\n", + m_WorkgroupSize, m_testIterationCount + ); + + shader = m_device->compileShader({overridenSource.get()}); } if (!shader) @@ -99,7 +99,7 @@ class ITester // Allocate memory of the input buffer { - constexpr size_t BufferSize = sizeof(InputStruct); + const size_t BufferSize = sizeof(InputTestValues) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -134,7 +134,7 @@ class ITester // Allocate memory of the output buffer { - constexpr size_t BufferSize = sizeof(OutputStruct); + const size_t BufferSize = sizeof(TestResults) * m_testIterationCount; video::IGPUBuffer::SCreationParams params = {}; params.size = BufferSize; @@ -174,36 +174,70 @@ class ITester if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) m_device->invalidateMappedMemoryRanges(1, &memoryRange); - assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); + assert(memoryRange.valid() && memoryRange.length >= sizeof(TestResults)); m_queue = m_device->getQueue(m_queueFamily, 0); } + void performTestsAndVerifyResults() + { + core::vector inputTestValues; + core::vector exceptedTestResults; + + inputTestValues.reserve(m_testIterationCount); + exceptedTestResults.reserve(m_testIterationCount); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < m_testIterationCount; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput = generateInputTestValues(); + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestResults expected = determineExpectedResults(testInput); + + inputTestValues.push_back(testInput); + exceptedTestResults.push_back(expected); + } + + core::vector cpuTestResults = performCpuTests(inputTestValues); + core::vector gpuTestResults = performGpuTests(inputTestValues); + + verifyAllTestResults(cpuTestResults, gpuTestResults, exceptedTestResults); + + m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + reloadSeed(); + } + + virtual ~ITester() + { + m_outputBufferAllocation.memory->unmap(); + }; + +protected: enum class TestType { CPU, GPU }; - template - void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) + /** + * @param testBatchCount one test batch is equal to m_WorkgroupSize, so number of tests performed will be m_WorkgroupSize * testbatchCount + */ + ITester(const uint32_t testBatchCount) + : m_testIterationCount(testBatchCount* m_WorkgroupSize) { - if (expectedVal == testVal) - return; + reloadSeed(); + }; - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - break; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } + virtual void verifyTestResults(const TestResults& expectedTestValues, const TestResults& testValues, const size_t testIteration, const uint32_t seed, TestType testType) = 0; - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + virtual InputTestValues generateInputTestValues() = 0; - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + virtual TestResults determineExpectedResults(const InputTestValues& testInput) = 0; + + std::mt19937& getRandomEngine() + { + return m_mersenneTwister; } protected: @@ -223,9 +257,8 @@ class ITester core::smart_refctd_ptr m_semaphore; video::IQueue* m_queue; uint64_t m_semaphoreCounter; - - template - OutputStruct dispatch(const InputStruct& input) + + void dispatchGpuTests(const core::vector& input, core::vector& output) { // Update input buffer if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) @@ -235,17 +268,20 @@ class ITester if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) m_device->invalidateMappedMemoryRanges(1, &memoryRange); - std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); + assert(m_testIterationCount == input.size()); + const size_t inputDataSize = sizeof(InputTestValues) * m_testIterationCount; + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), input.data(), inputDataSize); m_inputBufferAllocation.memory->unmap(); // record command buffer + const uint32_t dispatchSizeX = (m_testIterationCount + (m_WorkgroupSize - 1)) / m_WorkgroupSize; m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); m_cmdbuf->bindComputePipeline(m_pipeline.get()); m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); - m_cmdbuf->dispatch(1, 1, 1); + m_cmdbuf->dispatch(dispatchSizeX, 1, 1); m_cmdbuf->endDebugMarker(); m_cmdbuf->end(); @@ -260,11 +296,36 @@ class ITester m_api->endCapture(); m_device->waitIdle(); - OutputStruct output; - std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); + + // save test results + assert(m_testIterationCount == output.size()); + const size_t outputDataSize = sizeof(TestResults) * m_testIterationCount; + std::memcpy(output.data(), static_cast(m_outputBufferAllocation.memory->getMappedPointer()), outputDataSize); + m_device->waitIdle(); + } - return output; + template + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const size_t testIteration, const uint32_t seed, const TestType testType) + { + if (expectedVal == testVal) + return; + + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + break; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + ss << "TEST ITERATION INDEX: " << testIteration << " SEED: " << seed << '\n'; + ss << "EXPECTED VALUE: " << system::to_string(expectedVal) << " TEST VALUE: " << system::to_string(testVal) << '\n'; + + m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } private: @@ -274,6 +335,52 @@ class ITester m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); exit(-1); } + + core::vector performCpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + TestExecutor testExecutor; + + auto iterations = std::views::iota(0ull, m_testIterationCount); + std::for_each(std::execution::par_unseq, iterations.begin(), iterations.end(), + [&](size_t i) + { + testExecutor(inputTestValues[i], output[i]); + } + ); + + return output; + } + + core::vector performGpuTests(const core::vector& inputTestValues) + { + core::vector output(m_testIterationCount); + dispatchGpuTests(inputTestValues, output); + + return output; + } + + void verifyAllTestResults(const core::vector& cpuTestReults, const core::vector& gpuTestReults, const core::vector& exceptedTestReults) + { + for (int i = 0; i < m_testIterationCount; ++i) + { + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::CPU); + verifyTestResults(exceptedTestReults[i], cpuTestReults[i], i, m_seed, ITester::TestType::GPU); + } + } + + void reloadSeed() + { + std::random_device rd; + m_seed = rd(); + m_mersenneTwister = std::mt19937(m_seed); + } + + const size_t m_testIterationCount; + static constexpr size_t m_WorkgroupSize = 128u; + // seed will change after every call to performTestsAndVerifyResults() + std::mt19937 m_mersenneTwister; + uint32_t m_seed; }; #endif \ No newline at end of file