diff --git a/03_DeviceSelectionAndSharedSources/main.cpp b/03_DeviceSelectionAndSharedSources/main.cpp index b8fd3d18b..bcc849a4d 100644 --- a/03_DeviceSelectionAndSharedSources/main.cpp +++ b/03_DeviceSelectionAndSharedSources/main.cpp @@ -257,7 +257,7 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M } const auto* metadata = assetBundle.getMetadata(); - const auto hlslMetadata = static_cast(metadata); + const auto hlslMetadata = static_cast(metadata); const auto shaderStage = hlslMetadata->shaderStages->front(); // It would be super weird if loading a shader from a file produced more than 1 asset diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index a434ff32a..55ebaf41d 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl index af38ffada..31c60aefd 100644 --- a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl +++ b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl @@ -1,12 +1,9 @@ #include "common.hlsl" -// just a small test -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - [[vk::push_constant]] PushConstantData pushConstants; // does absolutely nothing, a later example will show how it gets used -template +template void dummyTraitTest() {} [numthreads(WorkgroupSize,1,1)] diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index b82dc18ca..ab0984a07 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -6,6 +6,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; @@ -95,15 +96,15 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl",lp); + lp.workingDirectory = "app_resources"; // virtual root + + auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - const auto shaderSource = IAsset::castDown(assets[0]); - shader = m_device->compileShader({shaderSource.get()}); + shader = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(shader); } diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index a434ff32a..fe063be7c 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/comp_shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/comp_shader.hlsl", + "KEY": "comp_shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/07_StagingAndMultipleQueues/app_resources/common.hlsl b/07_StagingAndMultipleQueues/app_resources/common.hlsl index 259d5069d..de15810c9 100644 --- a/07_StagingAndMultipleQueues/app_resources/common.hlsl +++ b/07_StagingAndMultipleQueues/app_resources/common.hlsl @@ -1,8 +1,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSizeX = 16; -NBL_CONSTEXPR uint32_t WorkgroupSizeY = 16; -NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; static const uint32_t FRAMES_IN_FLIGHT = 3u; diff --git a/07_StagingAndMultipleQueues/main.cpp b/07_StagingAndMultipleQueues/main.cpp index fc6bf4551..a850c1c47 100644 --- a/07_StagingAndMultipleQueues/main.cpp +++ b/07_StagingAndMultipleQueues/main.cpp @@ -4,6 +4,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; using namespace nbl::core; @@ -189,7 +190,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul for (uint32_t imageIdx = 0; imageIdx < IMAGE_CNT; ++imageIdx) { const auto imagePathToLoad = imagesToLoad[imageIdx]; - auto cpuImage = loadFistAssetInBundle(imagePathToLoad); + auto cpuImage = loadImageAsset(imagePathToLoad); if (!cpuImage) logFailAndTerminate("Failed to load image from path %s",ILogger::ELL_ERROR,imagePathToLoad); @@ -279,17 +280,10 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul } // LOAD SHADER FROM FILE - smart_refctd_ptr source; - { - source = loadFistAssetInBundle("../app_resources/comp_shader.hlsl"); - } + smart_refctd_ptr shader = loadPreCompiledShader<"comp_shader">(); // "../app_resources/comp_shader.hlsl" - if (!source) - logFailAndTerminate("Could not create a CPU shader!"); - - core::smart_refctd_ptr shader = m_device->compileShader({ source.get() }); - if(!shader) - logFailAndTerminate("Could not compile shader to spirv!"); + if (!shader) + logFailAndTerminate("Could not load the precompiled shader!"); // CREATE COMPUTE PIPELINE SPushConstantRange pc[1]; @@ -534,21 +528,39 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul return false; } - - template - core::smart_refctd_ptr loadFistAssetInBundle(const std::string& path) + + core::smart_refctd_ptr loadImageAsset(const std::string& path) { IAssetLoader::SAssetLoadParams lp; SAssetBundle bundle = m_assetMgr->getAsset(path, lp); if (bundle.getContents().empty()) - logFailAndTerminate("Couldn't load an asset.",ILogger::ELL_ERROR); + logFailAndTerminate("Couldn't load an image.",ILogger::ELL_ERROR); - auto asset = IAsset::castDown(bundle.getContents()[0]); + auto asset = IAsset::castDown(bundle.getContents()[0]); if (!asset) logFailAndTerminate("Incorrect asset loaded.",ILogger::ELL_ERROR); return asset; } + + template + core::smart_refctd_ptr loadPreCompiledShader() + { + IAssetLoader::SAssetLoadParams lp; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + SAssetBundle bundle = m_assetMgr->getAsset(key.data(), lp); + if (bundle.getContents().empty()) + logFailAndTerminate("Couldn't load a shader.", ILogger::ELL_ERROR); + + auto asset = IAsset::castDown(bundle.getContents()[0]); + if (!asset) + logFailAndTerminate("Incorrect asset loaded.", ILogger::ELL_ERROR); + + return asset; + } }; NBL_MAIN_FUNC(StagingAndMultipleQueuesApp) diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt index b7cad41da..14bde428d 100644 --- a/10_CountingSort/CMakeLists.txt +++ b/10_CountingSort/CMakeLists.txt @@ -22,3 +22,70 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/prefix_sum_shader.comp.hlsl + app_resources/scatter_shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(REQUIRED_CAPS [=[ + { + "kind": "limits", + "name": "maxComputeWorkGroupInvocations", + "type": "uint32_t", + "values": [256,512,1024] + }, + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + } +]=]) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/prefix_sum_shader.comp.hlsl", + "KEY": "prefix_sum_shader", + "CAPS": [${REQUIRED_CAPS}] + }, + { + "INPUT": "app_resources/scatter_shader.comp.hlsl", + "KEY": "scatter_shader", + "CAPS": [${REQUIRED_CAPS}] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/10_CountingSort/app_resources/common.hlsl b/10_CountingSort/app_resources/common.hlsl index bcbf01727..1074432b0 100644 --- a/10_CountingSort/app_resources/common.hlsl +++ b/10_CountingSort/app_resources/common.hlsl @@ -22,6 +22,10 @@ using namespace nbl::hlsl; #ifdef __HLSL_VERSION #include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +static const uint32_t WorkgroupSize = DeviceConfigCaps::maxComputeWorkGroupInvocations; +static const uint32_t MaxBucketCount = (DeviceConfigCaps::maxComputeSharedMemorySize / sizeof(uint32_t)) / 2; +static const uint32_t BucketCount = (MaxBucketCount > 3000) ? 3000 : MaxBucketCount; + using Ptr = bda::__ptr; using PtrAccessor = BdaAccessor; @@ -54,6 +58,8 @@ uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(WorkgroupSize, 1, 1); } + + #endif #endif \ No newline at end of file diff --git a/10_CountingSort/main.cpp b/10_CountingSort/main.cpp index d51650919..a22647750 100644 --- a/10_CountingSort/main.cpp +++ b/10_CountingSort/main.cpp @@ -1,4 +1,5 @@ #include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; using namespace nbl::core; @@ -32,19 +33,34 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio return false; auto limits = m_physicalDevice->getLimits(); + constexpr std::array AllowedMaxComputeSharedMemorySizes = { + 16384, 32768, 65536 + }; + + auto upperBoundSharedMemSize = std::upper_bound(AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize); + // devices which support less than 16KB of max compute shared memory size are not supported + if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin()) + { + m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize); + exit(0); + } + + limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1); + const uint32_t WorkgroupSize = limits.maxComputeWorkGroupInvocations; const uint32_t MaxBucketCount = (limits.maxComputeSharedMemorySize / sizeof(uint32_t)) / 2; constexpr uint32_t element_count = 100000; const uint32_t bucket_count = std::min((uint32_t)3000, MaxBucketCount); const uint32_t elements_per_thread = ceil((float)ceil((float)element_count / limits.computeUnits) / WorkgroupSize); - auto prepShader = [&](const core::string& path) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { // this time we load a shader directly from a file IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(path,lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(limits, m_physicalDevice->getFeatures()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -52,29 +68,24 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio return nullptr; } - auto source = IAsset::castDown(assets[0]); + auto shader = IAsset::castDown(assets[0]); // The down-cast should not fail! - assert(source); + assert(shader); // There's two ways of doing stuff like this: // 1. this - modifying the asset after load // 2. creating a short shader source file that includes the asset you would have wanted to load - auto overrideSource = CHLSLCompiler::createOverridenCopy( - source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n", - WorkgroupSize, bucket_count - ); + // + //auto overrideSource = CHLSLCompiler::createOverridenCopy( + // source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n", + // WorkgroupSize, bucket_count + //); // this time we skip the use of the asset converter since the IShader->IGPUShader path is quick and simple - auto shader = m_device->compileShader({ overrideSource.get() }); - if (!shader) - { - logFail("Creation of Prefix Sum Shader from CPU Shader source failed!"); - return nullptr; - } return shader; }; - auto prefixSumShader = prepShader("app_resources/prefix_sum_shader.comp.hlsl"); - auto scatterShader = prepShader("app_resources/scatter_shader.comp.hlsl"); + auto prefixSumShader = loadPrecompiledShader.operator()<"prefix_sum_shader">(); // "app_resources/prefix_sum_shader.comp.hlsl" + auto scatterShader = loadPrecompiledShader.operator()<"scatter_shader">(); // "app_resources/scatter_shader.comp.hlsl" // People love Reflection but I prefer Shader Sources instead! const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(CountingPushData) }; diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt index a434ff32a..ca9fe8428 100644 --- a/11_FFT/CMakeLists.txt +++ b/11_FFT/CMakeLists.txt @@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 1886da72a..49d157a38 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" @@ -45,15 +46,6 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ smart_refctd_ptr m_timeline; uint64_t semaphorValue = 0; - inline core::smart_refctd_ptr createShader( - const char* includeMainName) - { - std::string prelude = "#include \""; - auto hlslShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); - assert(hlslShader); - return m_device->compileShader({ hlslShader.get() }); - } - public: // Yay thanks to multiple inheritance we cannot forward ctors anymore FFT_Test(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -68,28 +60,23 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ if (!asset_base_t::onAppInitialized(std::move(system))) return false; - // this time we load a shader directly from a file smart_refctd_ptr shader; - /* { + { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); // Cast down the asset to its proper type - auto source = IAsset::castDown(assets[0]); - // The down-cast should not fail! - assert(source); - - // Compile directly to SPIR-V Shader - shader = m_device->compileShader({ source.get() }); + shader = IAsset::castDown(assets[0]); + if (!shader) - return logFail("Creation of a SPIR-V Shader from HLSL Shader source failed!"); - }*/ - shader = createShader("app_resources/shader.comp.hlsl"); + return logFail("Invalid shader!"); + } // Create massive upload/download buffers constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; diff --git a/14_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt new file mode 100644 index 000000000..a434ff32a --- /dev/null +++ b/14_Mortons/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h new file mode 100644 index 000000000..6933e77e5 --- /dev/null +++ b/14_Mortons/CTester.h @@ -0,0 +1,521 @@ +#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ + +#include +#include "app_resources/testCommon.hlsl" +#include "app_resources/testCommon2.hlsl" +#include "ITester.h" + +using namespace nbl; + +class CTester final : public ITester +{ +public: + void performTests() + { + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + uint64_t generatedA = longDistribution(mt); + uint64_t generatedB = longDistribution(mt); + + testInput.generatedA = generatedA; + testInput.generatedB = generatedB; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + // Plus + expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + // Left-shift + expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); + expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); + expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + + expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); + expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); + expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + + expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); + expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); + expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + } + + performCpuTests(testInput, expected); + performGpuTests(testInput, expected); + } + m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +private: + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + + fillTestValues(commonTestInputValues, cpuTestValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); + verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); + + // Morton Plus + verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); + verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); + verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); + verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); + + verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); + + // Morton Minus + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); + + // Morton coordinate-wise equality + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); + + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); + + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); + verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType); + + // Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); + + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); + + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); + // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); + + // Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); + // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); + + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); + // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); + + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); + // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + + // Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); + + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); + + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); + + // Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); + + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); + + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); + + // Morton signed right-shift + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); + + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); + //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); + + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + } +}; + +// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. +class CTester2 final : public ITester +{ +public: + void performTests() + { + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + + } + + performCpuTests(testInput, expected); + performGpuTests(testInput, expected); + } + m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +private: + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + + fillTestValues2(commonTestInputValues, cpuTestValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + + verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); + + verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); + verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); + verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + + verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); + verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); + verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + + } +}; +#endif \ No newline at end of file diff --git a/14_Mortons/ITester.h b/14_Mortons/ITester.h new file mode 100644 index 000000000..3be6d1d6b --- /dev/null +++ b/14_Mortons/ITester.h @@ -0,0 +1,279 @@ +#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ + +#include +#include "app_resources/common.hlsl" +#include "nbl/application_templates/MonoDeviceApplication.hpp" + +using namespace nbl; + +class ITester +{ +public: + virtual ~ITester() + { + m_outputBufferAllocation.memory->unmap(); + }; + + struct PipelineSetupData + { + std::string testShaderPath; + core::smart_refctd_ptr device; + core::smart_refctd_ptr api; + core::smart_refctd_ptr assetMgr; + core::smart_refctd_ptr logger; + video::IPhysicalDevice* physicalDevice; + uint32_t computeFamilyIndex; + }; + + template + void setupPipeline(const PipelineSetupData& pipleineSetupData) + { + // setting up pipeline in the constructor + m_device = core::smart_refctd_ptr(pipleineSetupData.device); + m_physicalDevice = pipleineSetupData.physicalDevice; + m_api = core::smart_refctd_ptr(pipleineSetupData.api); + m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); + m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); + m_queueFamily = pipleineSetupData.computeFamilyIndex; + m_semaphoreCounter = 0; + m_semaphore = m_device->createSemaphore(0); + m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipeline + core::smart_refctd_ptr shader; + { + asset::IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + // It would be super weird if loading a shader from a file produced more than 1 asset + assert(assets.size() == 1); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + + shader = m_device->compileShader({source.get()}); + } + + if (!shader) + logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + + video::IGPUDescriptorSetLayout::SBinding bindings[2] = { + { + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + } + }; + + core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + logFail("Failed to create a Pipeline Layout!\n"); + + { + video::IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate memory of the input buffer + { + constexpr size_t BufferSize = sizeof(InputStruct); + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); + if (!inputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + inputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_inputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(inputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + // Allocate memory of the output buffer + { + constexpr size_t BufferSize = sizeof(OutputStruct); + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); + if (!outputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + outputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_outputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(outputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); + if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); + + m_queue = m_device->getQueue(m_queueFamily, 0); + } + + enum class TestType + { + CPU, + GPU + }; + + template + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) + { + if (expectedVal == testVal) + return; + + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + break; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; + + m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + } + +protected: + uint32_t m_queueFamily; + core::smart_refctd_ptr m_device; + core::smart_refctd_ptr m_api; + video::IPhysicalDevice* m_physicalDevice; + core::smart_refctd_ptr m_assetMgr; + core::smart_refctd_ptr m_logger; + video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; + video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; + core::smart_refctd_ptr m_cmdbuf = nullptr; + core::smart_refctd_ptr m_cmdpool = nullptr; + core::smart_refctd_ptr m_ds = nullptr; + core::smart_refctd_ptr m_pplnLayout = nullptr; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_semaphore; + video::IQueue* m_queue; + uint64_t m_semaphoreCounter; + + template + OutputStruct dispatch(const InputStruct& input) + { + // Update input buffer + if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); + if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); + + m_inputBufferAllocation.memory->unmap(); + + // record command buffer + m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); + m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->dispatch(1, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + + video::IQueue::SSubmitInfo submitInfos[1] = {}; + const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + submitInfos[0].signalSemaphores = signals; + + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + + m_device->waitIdle(); + OutputStruct output; + std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); + m_device->waitIdle(); + + return output; + } + +private: + template + inline void logFail(const char* msg, Args&&... args) + { + m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); + exit(-1); + } +}; + +#endif \ No newline at end of file diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl new file mode 100644 index 000000000..98e5e1342 --- /dev/null +++ b/14_Mortons/app_resources/common.hlsl @@ -0,0 +1,233 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ + +#include + +#include + +NBL_CONSTEXPR uint16_t smallBits_2 = 8; +NBL_CONSTEXPR uint16_t mediumBits_2 = 16; +NBL_CONSTEXPR uint16_t fullBits_2 = 32; +NBL_CONSTEXPR uint16_t smallBits_3 = 5; +NBL_CONSTEXPR uint16_t mediumBits_3 = 10; +NBL_CONSTEXPR uint16_t fullBits_3 = 21; +NBL_CONSTEXPR uint16_t smallBits_4 = 4; +NBL_CONSTEXPR uint16_t mediumBits_4 = 8; +NBL_CONSTEXPR uint16_t fullBits_4 = 16; + +using namespace nbl::hlsl; +template +NBL_CONSTEXPR_INLINE_FUNC T createAnyBitIntegerFromU64(uint64_t val) +{ + if(Signed) + { + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << (Bits - 1)) - 1; + // fill excess bit with one + if (_static_cast(val) < 0) + return _static_cast(val | ~mask); + else + return _static_cast(val & mask); + } else + { + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << Bits) - 1; + return _static_cast(val & mask); + } +} + +template +NBL_CONSTEXPR_INLINE_FUNC vector createAnyBitIntegerVecFromU64Vec(vector val) +{ + array_get, uint64_t> getter; + array_set, T> setter; + vector output; + NBL_UNROLL + for (uint16_t i = 0; i < D; i++) + { + setter(output, i, createAnyBitIntegerFromU64(getter(val, i))); + } + return output; +} + +template +NBL_CONSTEXPR_INLINE_FUNC morton::code createMortonFromU64Vec(const vector vec) +{ + using morton_code_t = morton::code; + using decode_component_t = typename morton_code_t::decode_component_t; + return morton_code_t::create(createAnyBitIntegerVecFromU64Vec(vec)); +} + +struct InputTestValues +{ + // Both tests + uint32_t shift; + + // Emulated int tests + uint64_t generatedA; + uint64_t generatedB; + + // Morton tests + uint64_t coordX; + uint64_t coordY; + uint64_t coordZ; + uint64_t coordW; +}; + +struct TestValues +{ + // Emulated int tests + emulated_uint64_t emulatedAnd; + emulated_uint64_t emulatedOr; + emulated_uint64_t emulatedXor; + emulated_uint64_t emulatedNot; + emulated_uint64_t emulatedPlus; + emulated_uint64_t emulatedMinus; + emulated_int64_t emulatedUnaryMinus; + // These are bools but stored as uint because you can't store bools, causes a SPIR-V issue + uint32_t emulatedLess; + uint32_t emulatedLessEqual; + uint32_t emulatedGreater; + uint32_t emulatedGreaterEqual; + emulated_uint64_t emulatedLeftShifted; + emulated_uint64_t emulatedUnsignedRightShifted; + emulated_int64_t emulatedSignedRightShifted; + + // Morton tests - for each dimension let's do one small, medium and full-szied (max bits possible) test to cover representation with + // 16, 32 and 64-bit types. Could make it more exhaustive with macros (test all possible bitwidths) + // For emulated mortons, we store only the emulated uint64 representing it, because DXC complains about bitcasts otherwise + + // Plus + morton::code mortonPlus_small_2; + morton::code mortonPlus_medium_2; + morton::code mortonPlus_full_2; + morton::code mortonPlus_emulated_2; + + morton::code mortonPlus_small_3; + morton::code mortonPlus_medium_3; + morton::code mortonPlus_full_3; + morton::code mortonPlus_emulated_3; + + morton::code mortonPlus_small_4; + morton::code mortonPlus_medium_4; + morton::code mortonPlus_full_4; + morton::code mortonPlus_emulated_4; + + // Minus + morton::code mortonMinus_small_2; + morton::code mortonMinus_medium_2; + morton::code mortonMinus_full_2; + morton::code mortonMinus_emulated_2; + + morton::code mortonMinus_small_3; + morton::code mortonMinus_medium_3; + morton::code mortonMinus_full_3; + morton::code mortonMinus_emulated_3; + + morton::code mortonMinus_small_4; + morton::code mortonMinus_medium_4; + morton::code mortonMinus_full_4; + morton::code mortonMinus_emulated_4; + + // Coordinate-wise equality (these are bools) + uint32_t2 mortonEqual_small_2; + uint32_t2 mortonEqual_medium_2; + uint32_t2 mortonEqual_full_2; + uint32_t2 mortonEqual_emulated_2; + + uint32_t3 mortonEqual_small_3; + uint32_t3 mortonEqual_medium_3; + uint32_t3 mortonEqual_full_3; + uint32_t3 mortonEqual_emulated_3; + + uint32_t4 mortonEqual_small_4; + uint32_t4 mortonEqual_medium_4; + uint32_t4 mortonEqual_full_4; + uint32_t4 mortonEqual_emulated_4; + + // Coordinate-wise unsigned inequality (just testing with less, again these are bools) + uint32_t2 mortonUnsignedLess_small_2; + uint32_t2 mortonUnsignedLess_medium_2; + uint32_t2 mortonUnsignedLess_full_2; + uint32_t2 mortonUnsignedLess_emulated_2; + + uint32_t3 mortonUnsignedLess_small_3; + uint32_t3 mortonUnsignedLess_medium_3; + uint32_t3 mortonUnsignedLess_full_3; + uint32_t3 mortonUnsignedLess_emulated_3; + + uint32_t4 mortonUnsignedLess_small_4; + uint32_t4 mortonUnsignedLess_medium_4; + uint32_t4 mortonUnsignedLess_full_4; + uint32_t4 mortonUnsignedLess_emulated_4; + + // Coordinate-wise signed inequality (bools) + uint32_t2 mortonSignedLess_small_2; + uint32_t2 mortonSignedLess_medium_2; + uint32_t2 mortonSignedLess_full_2; + uint32_t2 mortonSignedLess_emulated_2; + + uint32_t3 mortonSignedLess_small_3; + uint32_t3 mortonSignedLess_medium_3; + uint32_t3 mortonSignedLess_full_3; + uint32_t3 mortonSignedLess_emulated_3; + + uint32_t4 mortonSignedLess_small_4; + uint32_t4 mortonSignedLess_medium_4; + uint32_t4 mortonSignedLess_full_4; + uint32_t4 mortonSignedLess_emulated_4; + + // Left-shift + morton::code mortonLeftShift_small_2; + morton::code mortonLeftShift_medium_2; + morton::code mortonLeftShift_full_2; + morton::code mortonLeftShift_emulated_2; + + morton::code mortonLeftShift_small_3; + morton::code mortonLeftShift_medium_3; + morton::code mortonLeftShift_full_3; + morton::code mortonLeftShift_emulated_3; + + morton::code mortonLeftShift_small_4; + morton::code mortonLeftShift_medium_4; + morton::code mortonLeftShift_full_4; + morton::code mortonLeftShift_emulated_4; + + // Unsigned right-shift + morton::code mortonUnsignedRightShift_small_2; + morton::code mortonUnsignedRightShift_medium_2; + morton::code mortonUnsignedRightShift_full_2; + morton::code mortonUnsignedRightShift_emulated_2; + + morton::code mortonUnsignedRightShift_small_3; + morton::code mortonUnsignedRightShift_medium_3; + morton::code mortonUnsignedRightShift_full_3; + morton::code mortonUnsignedRightShift_emulated_3; + + morton::code mortonUnsignedRightShift_small_4; + morton::code mortonUnsignedRightShift_medium_4; + morton::code mortonUnsignedRightShift_full_4; + morton::code mortonUnsignedRightShift_emulated_4; + + // Signed right-shift + morton::code mortonSignedRightShift_small_2; + morton::code mortonSignedRightShift_medium_2; + morton::code mortonSignedRightShift_full_2; + morton::code mortonSignedRightShift_emulated_2; + + morton::code mortonSignedRightShift_small_3; + morton::code mortonSignedRightShift_medium_3; + morton::code mortonSignedRightShift_full_3; + morton::code mortonSignedRightShift_emulated_3; + + morton::code mortonSignedRightShift_small_4; + morton::code mortonSignedRightShift_medium_4; + morton::code mortonSignedRightShift_full_4; + morton::code mortonSignedRightShift_emulated_4; + + +}; + +#endif diff --git a/14_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl new file mode 100644 index 000000000..60cdf94b1 --- /dev/null +++ b/14_Mortons/app_resources/test.comp.hlsl @@ -0,0 +1,17 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include "testCommon.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(1, 1, 1)] +[shader("compute")] +void main(uint3 invocationID : SV_DispatchThreadID) +{ + uint32_t testID = glsl::gl_GlobalInvocationID().x; + fillTestValues(inputTestValues[testID], outputTestValues[testID]); +} diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl new file mode 100644 index 000000000..30b998f49 --- /dev/null +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -0,0 +1,17 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include "testCommon2.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(1, 1, 1)] +[shader("compute")] +void main(uint3 invocationID : SV_DispatchThreadID) +{ + uint32_t testID = glsl::gl_GlobalInvocationID().x; + fillTestValues2(inputTestValues[testID], outputTestValues[testID]); +} diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl new file mode 100644 index 000000000..6144b6ce9 --- /dev/null +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -0,0 +1,295 @@ +#include "common.hlsl" + + +void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +{ + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); + + output.emulatedUnaryMinus = signedEmulatedA.operator-(); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); + + morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); + + morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); + + morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + + morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + + morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); + // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); + // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); + // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); + // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + // arithmetic_right_shift_operator > rightShiftSignedEmulated2; + // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + // arithmetic_right_shift_operator > rightShiftSignedEmulated3; + // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + // arithmetic_right_shift_operator > rightShiftSignedEmulated4; + // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); + +} \ No newline at end of file diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl new file mode 100644 index 000000000..365b82340 --- /dev/null +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -0,0 +1,39 @@ +#include "common.hlsl" + +void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +{ + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); + + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(input.shift); + + arithmetic_right_shift_operator > rightShiftSignedEmulated2; + output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftSignedEmulated3; + output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftSignedEmulated4; + output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); +} diff --git a/14_Mortons/config.json.template b/14_Mortons/config.json.template new file mode 100644 index 000000000..717d05d53 --- /dev/null +++ b/14_Mortons/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", // should be none + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp new file mode 100644 index 000000000..12f55805f --- /dev/null +++ b/14_Mortons/main.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include +#include + +#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + +#include "app_resources/common.hlsl" +#include "CTester.h" + +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::video; +using namespace nbl::examples; +using namespace nbl::application_templates; + +class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + MortonTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + CTester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator + // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 + { + CTester mortonTester; + pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + mortonTester.setupPipeline(pplnSetupData); + mortonTester.performTests(); + + CTester2 mortonTester2; + pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; + mortonTester2.setupPipeline(pplnSetupData); + mortonTester2.performTests(); + } + + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override + { + m_keepRunning = false; + } + + bool keepRunning() override + { + return m_keepRunning; + } + + +private: + bool m_keepRunning = true; +}; + +NBL_MAIN_FUNC(MortonTest) \ No newline at end of file diff --git a/14_Mortons/pipeline.groovy b/14_Mortons/pipeline.groovy new file mode 100644 index 000000000..1a7b043a4 --- /dev/null +++ b/14_Mortons/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +{ + public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index d053977c0..f014bd1cb 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -86,6 +86,10 @@ class CIntrinsicsTester final : public ITester testInput.smoothStepEdge0 = realDistributionNeg(mt); testInput.smoothStepEdge1 = realDistributionPos(mt); testInput.smoothStepX = realDistribution(mt); + testInput.addCarryA = std::numeric_limits::max() - uintDistribution(mt); + testInput.addCarryB = uintDistribution(mt); + testInput.subBorrowA = uintDistribution(mt); + testInput.subBorrowB = uintDistribution(mt); testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); @@ -120,6 +124,10 @@ class CIntrinsicsTester final : public ITester testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); testInput.refractEta = realDistribution(mt); + testInput.addCarryAVec = uint32_t3(std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt)); + testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); + testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); + testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values IntrinsicsTestValues expected; @@ -140,6 +148,9 @@ class CIntrinsicsTester final : public ITester expected.step = glm::step(testInput.stepEdge, testInput.stepX); expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + expected.frac = testInput.frac - std::floor(testInput.frac); expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); @@ -182,6 +193,9 @@ class CIntrinsicsTester final : public ITester expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); expected.mul = reinterpret_cast(mulGlm); auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); @@ -201,6 +215,7 @@ class CIntrinsicsTester final : public ITester void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) { IntrinsicsTestValues cpuTestValues; + cpuTestValues.fillTestValues(commonTestInputValues); verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); @@ -233,6 +248,10 @@ class CIntrinsicsTester final : public ITester verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); verifyTestValue("step", expectedTestValues.step, testValues.step, testType); verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); + verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); + verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); + verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); + verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); @@ -255,6 +274,10 @@ class CIntrinsicsTester final : public ITester verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); + verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); + verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); + verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); + verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h index 4ecd522b9..39ceb8141 100644 --- a/22_CppCompat/ITester.h +++ b/22_CppCompat/ITester.h @@ -218,6 +218,7 @@ class ITester { case TestType::CPU: ss << "CPU TEST ERROR:\n"; + break; case TestType::GPU: ss << "GPU TEST ERROR:\n"; } diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl index e2303a2fc..dc3ff5fcd 100644 --- a/22_CppCompat/app_resources/common.hlsl +++ b/22_CppCompat/app_resources/common.hlsl @@ -1,74 +1,74 @@ -//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. -//// This file is part of the "Nabla Engine". -//// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ - -// because DXC doesn't properly support `_Static_assert` -// TODO: add a message, and move to macros.h or cpp_compat -#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } - -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include - -#include - - -#include -#include -#include - -#include -#include - -// tgmath.hlsl and intrinsics.hlsl tests - -using namespace nbl::hlsl; -struct TgmathIntputTestValues -{ - float floor; - float isnan; - float isinf; - float powX; - float powY; - float exp; - float exp2; - float log; - float log2; - float absF; - int absI; - float sqrt; - float sin; - float cos; - float acos; - float modf; - float round; - float roundEven; - float trunc; - float ceil; - float fmaX; - float fmaY; - float fmaZ; - float ldexpArg; - int ldexpExp; - float modfStruct; - float frexpStruct; +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ + +// because DXC doesn't properly support `_Static_assert` +// TODO: add a message, and move to macros.h or cpp_compat +#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } + +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include + + +#include +#include +#include + +#include +#include + +// tgmath.hlsl and intrinsics.hlsl tests + +using namespace nbl::hlsl; +struct TgmathIntputTestValues +{ + float floor; + float isnan; + float isinf; + float powX; + float powY; + float exp; + float exp2; + float log; + float log2; + float absF; + int absI; + float sqrt; + float sin; + float cos; + float acos; + float modf; + float round; + float roundEven; + float trunc; + float ceil; + float fmaX; + float fmaY; + float fmaZ; + float ldexpArg; + int ldexpExp; + float modfStruct; + float frexpStruct; float tan; float asin; float atan; @@ -78,38 +78,38 @@ struct TgmathIntputTestValues float asinh; float acosh; float atanh; - float atan2X; - float atan2Y; - float erf; - float erfInv; - - float32_t3 floorVec; - float32_t3 isnanVec; - float32_t3 isinfVec; - float32_t3 powXVec; - float32_t3 powYVec; - float32_t3 expVec; - float32_t3 exp2Vec; - float32_t3 logVec; - float32_t3 log2Vec; - float32_t3 absFVec; - int32_t3 absIVec; - float32_t3 sqrtVec; - float32_t3 sinVec; - float32_t3 cosVec; - float32_t3 acosVec; - float32_t3 modfVec; - float32_t3 roundVec; - float32_t3 roundEvenVec; - float32_t3 truncVec; - float32_t3 ceilVec; - float32_t3 fmaXVec; - float32_t3 fmaYVec; - float32_t3 fmaZVec; - float32_t3 ldexpArgVec; - int32_t3 ldexpExpVec; - float32_t3 modfStructVec; - float32_t3 frexpStructVec; + float atan2X; + float atan2Y; + float erf; + float erfInv; + + float32_t3 floorVec; + float32_t3 isnanVec; + float32_t3 isinfVec; + float32_t3 powXVec; + float32_t3 powYVec; + float32_t3 expVec; + float32_t3 exp2Vec; + float32_t3 logVec; + float32_t3 log2Vec; + float32_t3 absFVec; + int32_t3 absIVec; + float32_t3 sqrtVec; + float32_t3 sinVec; + float32_t3 cosVec; + float32_t3 acosVec; + float32_t3 modfVec; + float32_t3 roundVec; + float32_t3 roundEvenVec; + float32_t3 truncVec; + float32_t3 ceilVec; + float32_t3 fmaXVec; + float32_t3 fmaYVec; + float32_t3 fmaZVec; + float32_t3 ldexpArgVec; + int32_t3 ldexpExpVec; + float32_t3 modfStructVec; + float32_t3 frexpStructVec; float32_t3 tanVec; float32_t3 asinVec; float32_t3 atanVec; @@ -119,35 +119,35 @@ struct TgmathIntputTestValues float32_t3 asinhVec; float32_t3 acoshVec; float32_t3 atanhVec; - float32_t3 atan2XVec; - float32_t3 atan2YVec; - float32_t3 erfVec; - float32_t3 erfInvVec; -}; - -struct TgmathTestValues -{ - float floor; - int isnan; - int isinf; - float pow; - float exp; - float exp2; - float log; - float log2; - float absF; - int absI; - float sqrt; - float sin; - float cos; - float acos; - float modf; - float round; - float roundEven; - float trunc; - float ceil; - float fma; - float ldexp; + float32_t3 atan2XVec; + float32_t3 atan2YVec; + float32_t3 erfVec; + float32_t3 erfInvVec; +}; + +struct TgmathTestValues +{ + float floor; + int isnan; + int isinf; + float pow; + float exp; + float exp2; + float log; + float log2; + float absF; + int absI; + float sqrt; + float sin; + float cos; + float acos; + float modf; + float round; + float roundEven; + float trunc; + float ceil; + float fma; + float ldexp; float tan; float asin; float atan; @@ -157,40 +157,40 @@ struct TgmathTestValues float asinh; float acosh; float atanh; - float atan2; - float erf; - float erfInv; - - float32_t3 floorVec; - - // we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below - // and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035 -#ifndef __HLSL_VERSION - nbl::hlsl::vector isnanVec; - nbl::hlsl::vector isinfVec; -#else - vector isnanVec; - vector isinfVec; -#endif - - float32_t3 powVec; - float32_t3 expVec; - float32_t3 exp2Vec; - float32_t3 logVec; - float32_t3 log2Vec; - float32_t3 absFVec; - int32_t3 absIVec; - float32_t3 sqrtVec; - float32_t3 cosVec; - float32_t3 sinVec; - float32_t3 acosVec; - float32_t3 modfVec; - float32_t3 roundVec; - float32_t3 roundEvenVec; - float32_t3 truncVec; - float32_t3 ceilVec; - float32_t3 fmaVec; - float32_t3 ldexpVec; + float atan2; + float erf; + float erfInv; + + float32_t3 floorVec; + + // we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below + // and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035 +#ifndef __HLSL_VERSION + nbl::hlsl::vector isnanVec; + nbl::hlsl::vector isinfVec; +#else + vector isnanVec; + vector isinfVec; +#endif + + float32_t3 powVec; + float32_t3 expVec; + float32_t3 exp2Vec; + float32_t3 logVec; + float32_t3 log2Vec; + float32_t3 absFVec; + int32_t3 absIVec; + float32_t3 sqrtVec; + float32_t3 cosVec; + float32_t3 sinVec; + float32_t3 acosVec; + float32_t3 modfVec; + float32_t3 roundVec; + float32_t3 roundEvenVec; + float32_t3 truncVec; + float32_t3 ceilVec; + float32_t3 fmaVec; + float32_t3 ldexpVec; float32_t3 tanVec; float32_t3 asinVec; float32_t3 atanVec; @@ -200,258 +200,275 @@ struct TgmathTestValues float32_t3 asinhVec; float32_t3 acoshVec; float32_t3 atanhVec; - float32_t3 atan2Vec; - float32_t3 erfVec; - float32_t3 erfInvVec; - - ModfOutput modfStruct; - ModfOutput modfStructVec; - FrexpOutput frexpStruct; - FrexpOutput frexpStructVec; - - void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) - { - floor = nbl::hlsl::floor(input.floor); - isnan = nbl::hlsl::isnan(input.isnan); - isinf = nbl::hlsl::isinf(input.isinf); - pow = nbl::hlsl::pow(input.powX, input.powY); - exp = nbl::hlsl::exp(input.exp); - exp2 = nbl::hlsl::exp2(input.exp2); - log = nbl::hlsl::log(input.log); - log2 = nbl::hlsl::log2(input.log2); - absF = nbl::hlsl::abs(input.absF); - absI = nbl::hlsl::abs(input.absI); - sqrt = nbl::hlsl::sqrt(input.sqrt); - sin = nbl::hlsl::sin(input.sin); - cos = nbl::hlsl::cos(input.cos); - tan = nbl::hlsl::tan(input.tan); - asin = nbl::hlsl::asin(input.asin); - atan = nbl::hlsl::atan(input.atan); - sinh = nbl::hlsl::sinh(input.sinh); - cosh = nbl::hlsl::cosh(input.cosh); - tanh = nbl::hlsl::tanh(input.tanh); - asinh = nbl::hlsl::asinh(input.asinh); - acosh = nbl::hlsl::acosh(input.acosh); - atanh = nbl::hlsl::atanh(input.atanh); - atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); - erf = nbl::hlsl::erf(input.erf); - erfInv = nbl::hlsl::erfInv(input.erfInv); - acos = nbl::hlsl::acos(input.acos); - modf = nbl::hlsl::modf(input.modf); - round = nbl::hlsl::round(input.round); - roundEven = nbl::hlsl::roundEven(input.roundEven); - trunc = nbl::hlsl::trunc(input.trunc); - ceil = nbl::hlsl::ceil(input.ceil); - fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); - ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); - - floorVec = nbl::hlsl::floor(input.floorVec); - isnanVec = nbl::hlsl::isnan(input.isnanVec); - isinfVec = nbl::hlsl::isinf(input.isinfVec); - powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); - expVec = nbl::hlsl::exp(input.expVec); - exp2Vec = nbl::hlsl::exp2(input.exp2Vec); - logVec = nbl::hlsl::log(input.logVec); - log2Vec = nbl::hlsl::log2(input.log2Vec); - absFVec = nbl::hlsl::abs(input.absFVec); - absIVec = nbl::hlsl::abs(input.absIVec); - sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); - sinVec = nbl::hlsl::sin(input.sinVec); - cosVec = nbl::hlsl::cos(input.cosVec); - tanVec = nbl::hlsl::tan(input.tanVec); - asinVec = nbl::hlsl::asin(input.asinVec); - atanVec = nbl::hlsl::atan(input.atanVec); - sinhVec = nbl::hlsl::sinh(input.sinhVec); - coshVec = nbl::hlsl::cosh(input.coshVec); - tanhVec = nbl::hlsl::tanh(input.tanhVec); - asinhVec = nbl::hlsl::asinh(input.asinhVec); - acoshVec = nbl::hlsl::acosh(input.acoshVec); - atanhVec = nbl::hlsl::atanh(input.atanhVec); - atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); - acosVec = nbl::hlsl::acos(input.acosVec); - modfVec = nbl::hlsl::modf(input.modfVec); - roundVec = nbl::hlsl::round(input.roundVec); - roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); - truncVec = nbl::hlsl::trunc(input.truncVec); - ceilVec = nbl::hlsl::ceil(input.ceilVec); - fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); - ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); - erfVec = nbl::hlsl::erf(input.erfVec); - erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); - - modfStruct = nbl::hlsl::modfStruct(input.modfStruct); - modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); - frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); - frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); - } -}; - -struct IntrinsicsIntputTestValues -{ - int bitCount; - float32_t3 crossLhs; - float32_t3 crossRhs; - float clampVal; - float clampMin; - float clampMax; - float32_t3 length; - float32_t3 normalize; - float32_t3 dotLhs; - float32_t3 dotRhs; - float32_t3x3 determinant; - uint32_t findMSB; - uint32_t findLSB; - float32_t3x3 inverse; - float32_t3x3 transpose; - float32_t3x3 mulLhs; - float32_t3x3 mulRhs; - float minA; - float minB; - float maxA; - float maxB; - float rsqrt; - uint32_t bitReverse; - float frac; - float mixX; - float mixY; - float mixA; - float sign; - float radians; - float degrees; - float stepEdge; - float stepX; - float smoothStepEdge0; - float smoothStepEdge1; - float smoothStepX; - - int32_t3 bitCountVec; - float32_t3 clampValVec; - float32_t3 clampMinVec; - float32_t3 clampMaxVec; - uint32_t3 findMSBVec; - uint32_t3 findLSBVec; - float32_t3 minAVec; - float32_t3 minBVec; - float32_t3 maxAVec; - float32_t3 maxBVec; - float32_t3 rsqrtVec; - uint32_t3 bitReverseVec; - float32_t3 fracVec; - float32_t3 mixXVec; - float32_t3 mixYVec; - float32_t3 mixAVec; - float32_t3 signVec; - float32_t3 radiansVec; - float32_t3 degreesVec; - float32_t3 stepEdgeVec; - float32_t3 stepXVec; - float32_t3 smoothStepEdge0Vec; - float32_t3 smoothStepEdge1Vec; - float32_t3 smoothStepXVec; - float32_t3 faceForwardN; - float32_t3 faceForwardI; - float32_t3 faceForwardNref; - float32_t3 reflectI; - float32_t3 reflectN; - float32_t3 refractI; - float32_t3 refractN; - float refractEta; -}; - -struct IntrinsicsTestValues -{ - int bitCount; - float clamp; - float length; - float dot; - float determinant; - int findMSB; - int findLSB; - float min; - float max; - float rsqrt; - float frac; - uint32_t bitReverse; - float mix; - float sign; - float radians; - float degrees; - float step; - float smoothStep; - - float32_t3 normalize; - float32_t3 cross; - int32_t3 bitCountVec; - float32_t3 clampVec; - uint32_t3 findMSBVec; - uint32_t3 findLSBVec; - float32_t3 minVec; - float32_t3 maxVec; - float32_t3 rsqrtVec; - uint32_t3 bitReverseVec; - float32_t3 fracVec; - float32_t3 mixVec; - float32_t3 signVec; - float32_t3 radiansVec; - float32_t3 degreesVec; - float32_t3 stepVec; - float32_t3 smoothStepVec; - float32_t3 faceForward; - float32_t3 reflect; - float32_t3 refract; - - float32_t3x3 mul; - float32_t3x3 transpose; - float32_t3x3 inverse; - - void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) - { - bitCount = nbl::hlsl::bitCount(input.bitCount); - cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); - clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); - length = nbl::hlsl::length(input.length); - normalize = nbl::hlsl::normalize(input.normalize); - dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); - determinant = nbl::hlsl::determinant(input.determinant); - findMSB = nbl::hlsl::findMSB(input.findMSB); - findLSB = nbl::hlsl::findLSB(input.findLSB); - inverse = nbl::hlsl::inverse(input.inverse); - transpose = nbl::hlsl::transpose(input.transpose); - mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); - // TODO: fix min and max - min = nbl::hlsl::min(input.minA, input.minB); - max = nbl::hlsl::max(input.maxA, input.maxB); - rsqrt = nbl::hlsl::rsqrt(input.rsqrt); - bitReverse = nbl::hlsl::bitReverse(input.bitReverse); - frac = nbl::hlsl::fract(input.frac); - mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); - sign = nbl::hlsl::sign(input.sign); - radians = nbl::hlsl::radians(input.radians); - degrees = nbl::hlsl::degrees(input.degrees); - step = nbl::hlsl::step(input.stepEdge, input.stepX); - smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); - - bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); - clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); - findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); - findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); - // TODO: fix min and max - minVec = nbl::hlsl::min(input.minAVec, input.minBVec); - maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); - rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); - bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); - fracVec = nbl::hlsl::fract(input.fracVec); - mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); - - signVec = nbl::hlsl::sign(input.signVec); - radiansVec = nbl::hlsl::radians(input.radiansVec); - degreesVec = nbl::hlsl::degrees(input.degreesVec); - stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); - smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); - faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); - reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); - refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); - } -}; - -#endif + float32_t3 atan2Vec; + float32_t3 erfVec; + float32_t3 erfInvVec; + + ModfOutput modfStruct; + ModfOutput modfStructVec; + FrexpOutput frexpStruct; + FrexpOutput frexpStructVec; + + void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) + { + floor = nbl::hlsl::floor(input.floor); + isnan = nbl::hlsl::isnan(input.isnan); + isinf = nbl::hlsl::isinf(input.isinf); + pow = nbl::hlsl::pow(input.powX, input.powY); + exp = nbl::hlsl::exp(input.exp); + exp2 = nbl::hlsl::exp2(input.exp2); + log = nbl::hlsl::log(input.log); + log2 = nbl::hlsl::log2(input.log2); + absF = nbl::hlsl::abs(input.absF); + absI = nbl::hlsl::abs(input.absI); + sqrt = nbl::hlsl::sqrt(input.sqrt); + sin = nbl::hlsl::sin(input.sin); + cos = nbl::hlsl::cos(input.cos); + tan = nbl::hlsl::tan(input.tan); + asin = nbl::hlsl::asin(input.asin); + atan = nbl::hlsl::atan(input.atan); + sinh = nbl::hlsl::sinh(input.sinh); + cosh = nbl::hlsl::cosh(input.cosh); + tanh = nbl::hlsl::tanh(input.tanh); + asinh = nbl::hlsl::asinh(input.asinh); + acosh = nbl::hlsl::acosh(input.acosh); + atanh = nbl::hlsl::atanh(input.atanh); + atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); + erf = nbl::hlsl::erf(input.erf); + erfInv = nbl::hlsl::erfInv(input.erfInv); + acos = nbl::hlsl::acos(input.acos); + modf = nbl::hlsl::modf(input.modf); + round = nbl::hlsl::round(input.round); + roundEven = nbl::hlsl::roundEven(input.roundEven); + trunc = nbl::hlsl::trunc(input.trunc); + ceil = nbl::hlsl::ceil(input.ceil); + fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); + ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); + + floorVec = nbl::hlsl::floor(input.floorVec); + isnanVec = nbl::hlsl::isnan(input.isnanVec); + isinfVec = nbl::hlsl::isinf(input.isinfVec); + powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); + expVec = nbl::hlsl::exp(input.expVec); + exp2Vec = nbl::hlsl::exp2(input.exp2Vec); + logVec = nbl::hlsl::log(input.logVec); + log2Vec = nbl::hlsl::log2(input.log2Vec); + absFVec = nbl::hlsl::abs(input.absFVec); + absIVec = nbl::hlsl::abs(input.absIVec); + sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); + sinVec = nbl::hlsl::sin(input.sinVec); + cosVec = nbl::hlsl::cos(input.cosVec); + tanVec = nbl::hlsl::tan(input.tanVec); + asinVec = nbl::hlsl::asin(input.asinVec); + atanVec = nbl::hlsl::atan(input.atanVec); + sinhVec = nbl::hlsl::sinh(input.sinhVec); + coshVec = nbl::hlsl::cosh(input.coshVec); + tanhVec = nbl::hlsl::tanh(input.tanhVec); + asinhVec = nbl::hlsl::asinh(input.asinhVec); + acoshVec = nbl::hlsl::acosh(input.acoshVec); + atanhVec = nbl::hlsl::atanh(input.atanhVec); + atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); + acosVec = nbl::hlsl::acos(input.acosVec); + modfVec = nbl::hlsl::modf(input.modfVec); + roundVec = nbl::hlsl::round(input.roundVec); + roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); + truncVec = nbl::hlsl::trunc(input.truncVec); + ceilVec = nbl::hlsl::ceil(input.ceilVec); + fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); + ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); + erfVec = nbl::hlsl::erf(input.erfVec); + erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); + + modfStruct = nbl::hlsl::modfStruct(input.modfStruct); + modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); + frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); + frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); + } +}; + +struct IntrinsicsIntputTestValues +{ + int bitCount; + float32_t3 crossLhs; + float32_t3 crossRhs; + float clampVal; + float clampMin; + float clampMax; + float32_t3 length; + float32_t3 normalize; + float32_t3 dotLhs; + float32_t3 dotRhs; + float32_t3x3 determinant; + uint32_t findMSB; + uint32_t findLSB; + float32_t3x3 inverse; + float32_t3x3 transpose; + float32_t3x3 mulLhs; + float32_t3x3 mulRhs; + float minA; + float minB; + float maxA; + float maxB; + float rsqrt; + uint32_t bitReverse; + float frac; + float mixX; + float mixY; + float mixA; + float sign; + float radians; + float degrees; + float stepEdge; + float stepX; + float smoothStepEdge0; + float smoothStepEdge1; + float smoothStepX; + uint32_t addCarryA; + uint32_t addCarryB; + uint32_t subBorrowA; + uint32_t subBorrowB; + + int32_t3 bitCountVec; + float32_t3 clampValVec; + float32_t3 clampMinVec; + float32_t3 clampMaxVec; + uint32_t3 findMSBVec; + uint32_t3 findLSBVec; + float32_t3 minAVec; + float32_t3 minBVec; + float32_t3 maxAVec; + float32_t3 maxBVec; + float32_t3 rsqrtVec; + uint32_t3 bitReverseVec; + float32_t3 fracVec; + float32_t3 mixXVec; + float32_t3 mixYVec; + float32_t3 mixAVec; + float32_t3 signVec; + float32_t3 radiansVec; + float32_t3 degreesVec; + float32_t3 stepEdgeVec; + float32_t3 stepXVec; + float32_t3 smoothStepEdge0Vec; + float32_t3 smoothStepEdge1Vec; + float32_t3 smoothStepXVec; + float32_t3 faceForwardN; + float32_t3 faceForwardI; + float32_t3 faceForwardNref; + float32_t3 reflectI; + float32_t3 reflectN; + float32_t3 refractI; + float32_t3 refractN; + float refractEta; + uint32_t3 addCarryAVec; + uint32_t3 addCarryBVec; + uint32_t3 subBorrowAVec; + uint32_t3 subBorrowBVec; +}; + +struct IntrinsicsTestValues +{ + int bitCount; + float clamp; + float length; + float dot; + float determinant; + int findMSB; + int findLSB; + float min; + float max; + float rsqrt; + float frac; + uint32_t bitReverse; + float mix; + float sign; + float radians; + float degrees; + float step; + float smoothStep; + + float32_t3 normalize; + float32_t3 cross; + int32_t3 bitCountVec; + float32_t3 clampVec; + uint32_t3 findMSBVec; + uint32_t3 findLSBVec; + float32_t3 minVec; + float32_t3 maxVec; + float32_t3 rsqrtVec; + uint32_t3 bitReverseVec; + float32_t3 fracVec; + float32_t3 mixVec; + float32_t3 signVec; + float32_t3 radiansVec; + float32_t3 degreesVec; + float32_t3 stepVec; + float32_t3 smoothStepVec; + float32_t3 faceForward; + float32_t3 reflect; + float32_t3 refract; + + float32_t3x3 mul; + float32_t3x3 transpose; + float32_t3x3 inverse; + + spirv::AddCarryOutput addCarry; + spirv::SubBorrowOutput subBorrow; + spirv::AddCarryOutput addCarryVec; + spirv::SubBorrowOutput subBorrowVec; + + void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) + { + bitCount = nbl::hlsl::bitCount(input.bitCount); + cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); + clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); + length = nbl::hlsl::length(input.length); + normalize = nbl::hlsl::normalize(input.normalize); + dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); + determinant = nbl::hlsl::determinant(input.determinant); + findMSB = nbl::hlsl::findMSB(input.findMSB); + findLSB = nbl::hlsl::findLSB(input.findLSB); + inverse = nbl::hlsl::inverse(input.inverse); + transpose = nbl::hlsl::transpose(input.transpose); + mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); + // TODO: fix min and max + min = nbl::hlsl::min(input.minA, input.minB); + max = nbl::hlsl::max(input.maxA, input.maxB); + rsqrt = nbl::hlsl::rsqrt(input.rsqrt); + bitReverse = nbl::hlsl::bitReverse(input.bitReverse); + frac = nbl::hlsl::fract(input.frac); + mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); + sign = nbl::hlsl::sign(input.sign); + radians = nbl::hlsl::radians(input.radians); + degrees = nbl::hlsl::degrees(input.degrees); + step = nbl::hlsl::step(input.stepEdge, input.stepX); + smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); + + bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); + clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); + findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); + findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); + // TODO: fix min and max + minVec = nbl::hlsl::min(input.minAVec, input.minBVec); + maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); + rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); + bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); + fracVec = nbl::hlsl::fract(input.fracVec); + mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); + + signVec = nbl::hlsl::sign(input.signVec); + radiansVec = nbl::hlsl::radians(input.radiansVec); + degreesVec = nbl::hlsl::degrees(input.degreesVec); + stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); + smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); + faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); + reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); + refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); + addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); + subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); + addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); + subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + } +}; + +#endif diff --git a/22_CppCompat/app_resources/test.comp.hlsl b/22_CppCompat/app_resources/test.comp.hlsl index 98be76c53..17c59f970 100644 --- a/22_CppCompat/app_resources/test.comp.hlsl +++ b/22_CppCompat/app_resources/test.comp.hlsl @@ -3,9 +3,6 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "app_resources/common.hlsl" -template -const static bool is_same_v = nbl::hlsl::is_same_v; - struct PushConstants { @@ -88,6 +85,7 @@ struct device_capabilities2 }; [numthreads(8, 8, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { fill(invocationID, 1); @@ -157,9 +155,9 @@ void main(uint3 invocationID : SV_DispatchThreadID) { static const uint16_t TEST_VALUE_0 = 5; static const uint32_t TEST_VALUE_1 = 0x80000000u; - static const uint32_t TEST_VALUE_2 = 0x8000000000000000u; + static const uint32_t TEST_VALUE_2 = 0x8000000000000000u; // TODO: Przmek is this intended? it warns because its too big from uint32_t static const uint32_t TEST_VALUE_3 = 0x00000001u; - static const uint32_t TEST_VALUE_4 = 0x0000000000000001u; + static const uint32_t TEST_VALUE_4 = 0x0000000000000001u; // TODO: Przmek is this intended? it warns because its too big from uint32_t fill(invocationID, 5.01); diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index 026add505..a2feb2cb8 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -32,4 +32,49 @@ add_test(NAME NBL_IMAGE_HASH_RUN_TESTS COMMAND "$" --test hash WORKING_DIRECTORY "$" COMMAND_EXPAND_LISTS +) + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/present.frag.hlsl + app_resources/push_constants.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/present.frag.hlsl", + "KEY": "present", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} ) \ No newline at end of file diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 84c55ef3a..750756321 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -160,26 +161,24 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; - return m_device->compileShader({ source.get() }); - }; - auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); + auto shader = IAsset::castDown(assets[0]); + return shader; + }; + auto fragmentShader = loadPrecompiledShader.operator()<"present">(); // "app_resources/present.frag.hlsl" if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader!"); + return logFail("Failed to load precompiled fragment shader!"); // Now surface indep resources m_semaphore = m_device->createSemaphore(m_submitIx); diff --git a/27_MPMCScheduler/app_resources/common.hlsl b/27_MPMCScheduler/app_resources/common.hlsl index 2fb8971ad..2783f13a2 100644 --- a/27_MPMCScheduler/app_resources/common.hlsl +++ b/27_MPMCScheduler/app_resources/common.hlsl @@ -1,8 +1,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSizeX = 8; -NBL_CONSTEXPR uint32_t WorkgroupSizeY = 8; -NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 8; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 8; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; struct PushConstants { diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index 07c2ec8cf..02ae4ff40 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -68,8 +68,6 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase // This one shows up a lot so we give it a name const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u; - ternary_operator > ternaryOp; - // Since every two consecutive columns are stored as one packed column, we divide the index by 2 to get the index of that packed column const uint32_t firstIndex = workgroup::SubgroupContiguousIndex() / 2; int32_t paddedIndex = int32_t(firstIndex) - pushConstants.halfPadding; @@ -82,7 +80,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase { // If mirrored, we need to invert which thread is loading lo and which is loading hi // If using zero-padding, useful to find out if we're outside of [0,1) bounds - bool invert = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength; + bool inPadding = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength; int32_t wrappedIndex = paddedIndex < 0 ? ~paddedIndex : paddedIndex; // ~x = - x - 1 in two's complement (except maybe at the borders of representable range) wrappedIndex = paddedIndex < pushConstants.imageHalfRowLength ? wrappedIndex : pushConstants.imageRowLength + ~paddedIndex; const complex_t loOrHi = colMajorAccessor.get(colMajorOffset(wrappedIndex, y)); @@ -93,17 +91,17 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase if (glsl::gl_WorkGroupID().x) { - complex_t lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, hi, lo); + preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; - preloaded[localElementIndex] = ternaryOp(invert, Zero, ternaryOp(oddThread, hi, lo)); + preloaded[localElementIndex] = nbl::hlsl::select(inPadding, Zero, nbl::hlsl::select(oddThread, hi, lo)); #endif // ------------------------------------------------ END PADDING DIVERGENCE ---------------------------------------------------------------------------------------- } @@ -116,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl index eaecb5d0f..eca81e859 100644 --- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl @@ -46,8 +46,6 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas // This one shows up a lot so we give it a name const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u; - ternary_operator > ternaryOp; - if (glsl::gl_WorkGroupID().x) { // Even thread must index a y corresponding to an even element of the previous FFT pass, and the odd thread must index its DFT Mirror @@ -72,10 +70,10 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const vector loOrHiVector = vector (loOrHi.real(), loOrHi.imag()); const vector otherThreadloOrHiVector = glsl::subgroupShuffleXor< vector >(loOrHiVector, 1u); const complex_t otherThreadLoOrHi = { otherThreadloOrHiVector.x, otherThreadloOrHiVector.y }; - complex_t lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); - preloaded[channel][localElementIndex] = ternaryOp(oddThread, hi, lo); + preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, hi, lo); packedColumnIndex += WorkgroupSize / 2; } @@ -112,7 +110,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[channel][localElementIndex] = ternaryOp(oddThread, oddThreadHi, evenThreadLo); + preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, oddThreadHi, evenThreadLo); packedColumnIndex += WorkgroupSize / 2; } diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index c3a0fa47e..0928d3b61 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -61,4 +61,72 @@ else() foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS) target_include_directories(${EXECUTABLE_NAME} PUBLIC $) endforeach() -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + shaders/globals.hlsl + shaders/runtimeDeviceConfigCaps.hlsl + shaders/main_pipeline/common.hlsl + shaders/main_pipeline/dtm.hlsl + shaders/main_pipeline/fragment.hlsl + shaders/main_pipeline/fragment_shader.hlsl + shaders/main_pipeline/fragment_shader_debug.hlsl + shaders/main_pipeline/line_style.hlsl + shaders/main_pipeline/resolve_alphas.hlsl + shaders/main_pipeline/vertex_shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(REQUIRED_CAPS [=[ +{ + "kind": "features", + "name": "fragmentShaderPixelInterlock", + "type": "bool", + "values": [1] +} +]=]) + +set(JSON [=[ +[ + { + "INPUT": "shaders/main_pipeline/vertex_shader.hlsl", + "KEY": "main_pipeline_vertex_shader", + "CAPS": [${REQUIRED_CAPS}] + }, + { + "INPUT": "shaders/main_pipeline/fragment.hlsl", + "KEY": "main_pipeline_fragment_shader", + "CAPS": [${REQUIRED_CAPS}] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index f4a886791..905177f6b 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1,5 +1,5 @@ // TODO: Copyright notice - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" @@ -929,84 +929,29 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr mainPipelineVertexShader = {}; std::array, 2u> geoTexturePipelineShaders = {}; { - smart_refctd_ptr shaderReadCache = nullptr; - smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); - auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin"; - + // Load Custom Shader + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { - core::smart_refctd_ptr shaderReadCacheFile; + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) { - system::ISystem::future_t> future; - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ); - if (future.wait()) - { - future.acquire().move_into(shaderReadCacheFile); - if (shaderReadCacheFile) - { - const size_t size = shaderReadCacheFile->getSize(); - if (size > 0ull) - { - std::vector contents(size); - system::IFile::success_t succ; - shaderReadCacheFile->read(succ, contents.data(), 0, size); - if (succ) - shaderReadCache = IShaderCompiler::CCache::deserialize(contents); - } - } - } - else - m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR); + m_logger->log("Failed to load a precompiled ahsder.", ILogger::ELL_ERROR); + return nullptr; } + - } - - // Load Custom Shader - auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; - - return m_device->compileShader( ILogicalDevice::SShaderCreationParameters { .source = source.get(), .readCache = shaderReadCache.get(), .writeCache = shaderWriteCache.get(), .stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY }); - }; + auto shader = IAsset::castDown(assets[0]); + return shader; + }; - mainPipelineFragmentShaders = loadCompileShader("../shaders/main_pipeline/fragment.hlsl"); - mainPipelineVertexShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl"); - - core::smart_refctd_ptr shaderWriteCacheFile; - { - system::ISystem::future_t> future; - m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE); - if (future.wait()) - { - future.acquire().move_into(shaderWriteCacheFile); - if (shaderWriteCacheFile) - { - auto serializedCache = shaderWriteCache->serialize(); - if (shaderWriteCacheFile) - { - system::IFile::success_t succ; - shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize()); - if (!succ) - m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR); - } - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } + mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment.hlsl" + mainPipelineVertexShader = loadPrecompiledShader.operator() <"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl" } // Shared Blend Params between pipelines diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl index 691cd3d3b..f2053e003 100644 --- a/62_CAD/shaders/geotexture/common.hlsl +++ b/62_CAD/shaders/geotexture/common.hlsl @@ -4,7 +4,7 @@ #include "../globals.hlsl" // Handle multiple geo textures, separate set, array of texture? index allocator? or multiple sets? -NBL_CONSTEXPR uint32_t MaxGeoTextures = 256; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxGeoTextures = 256; // GeoTexture Oriented Bounding Box struct GeoTextureOBB diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 5c3681910..ead5a5fd9 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,12 +1,6 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#ifdef __HLSL_VERSION -#ifndef NBL_USE_SPIRV_BUILTINS -#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps -#endif -#endif - // TODO[Erfan]: Turn off in the future, but keep enabled to test // #define NBL_FORCE_EMULATED_FLOAT_64 @@ -352,8 +346,8 @@ static_assert(offsetof(CurveBox, curveMax[0]) == 56u); static_assert(sizeof(CurveBox) == 80u); #endif -NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff; -NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidRigidSegmentIndex = 0xffffffff; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; // TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like stipple patterns) and requires upper_bound to figure out the color based on height value. @@ -547,27 +541,27 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) } #endif -NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128; -NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture -NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; -NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; -NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t ImagesBindingArraySize = 128; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t AlphaBits = 32u - MainObjectIdxBits; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; // Hatches -NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; -NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); // Text or MSDF Hatches -NBL_CONSTEXPR float MSDFPixelRange = 4.0f; -NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; -NBL_CONSTEXPR float MSDFSize = 64.0f; -NBL_CONSTEXPR uint32_t MSDFMips = 4; -NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRange = 4.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFSize = 64.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MSDFMips = 4; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float HatchFillMSDFSceenSpaceSize = 8.0; inline bool isInvalidGridDtmHeightValue(float value) { diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 90394e935..df566f002 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -706,19 +706,19 @@ PSInput vtxMain(uint vertexID : SV_VertexID) if (corner.x == 0.0f && corner.y == 0.0f) { - dilationVector.x = ieee754::flipSign(dilationVector.x); + dilationVector.x = ieee754::flipSign(dilationVector.x, true); uvOffset.x = -uvOffset.x; uvOffset.y = -uvOffset.y; } else if (corner.x == 0.0f && corner.y == 1.0f) { - dilationVector.x = ieee754::flipSign(dilationVector.x); - dilationVector.y = ieee754::flipSign(dilationVector.y); + dilationVector.x = ieee754::flipSign(dilationVector.x, true); + dilationVector.y = ieee754::flipSign(dilationVector.y, true); uvOffset.x = -uvOffset.x; } else if (corner.x == 1.0f && corner.y == 1.0f) { - dilationVector.y = ieee754::flipSign(dilationVector.y); + dilationVector.y = ieee754::flipSign(dilationVector.y, true); } else if (corner.x == 1.0f && corner.y == 0.0f) { @@ -730,7 +730,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) pfloat64_t2 worldSpaceExtentsYAxisFlipped; worldSpaceExtentsYAxisFlipped.x = worldSpaceExtents.x; - worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y); + worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y, true); const pfloat64_t2 vtxPos = topLeft + worldSpaceExtentsYAxisFlipped * _static_cast(corner); const pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index aae93590d..af46da896 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -27,4 +27,55 @@ if(MSVC) target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") else() target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/test.comp.hlsl + app_resources/benchmark/benchmark.comp.hlsl + app_resources/benchmark/common.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }, + { + "INPUT": "app_resources/benchmark/benchmark.comp.hlsl", + "KEY": "benchmark", + }, +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl index b31da3737..a515f6bcb 100644 --- a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl +++ b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl @@ -66,6 +66,7 @@ uint64_t calcIntegral() } [numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { static const uint32_t NativeToEmulatedRatio = 6; diff --git a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl index 98875c42f..7f6d1dec1 100644 --- a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl +++ b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl @@ -4,10 +4,10 @@ #include -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u; enum EF64_BENCHMARK_MODE { diff --git a/64_EmulatedFloatTest/app_resources/common.hlsl b/64_EmulatedFloatTest/app_resources/common.hlsl index aea1ce94d..0e8762c5a 100644 --- a/64_EmulatedFloatTest/app_resources/common.hlsl +++ b/64_EmulatedFloatTest/app_resources/common.hlsl @@ -8,7 +8,7 @@ #include #include -NBL_CONSTEXPR uint32_t WORKGROUP_SIZE = 1; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WORKGROUP_SIZE = 1; using namespace nbl; using namespace hlsl; diff --git a/64_EmulatedFloatTest/app_resources/test.comp.hlsl b/64_EmulatedFloatTest/app_resources/test.comp.hlsl index 7681e80a5..e95eadd49 100644 --- a/64_EmulatedFloatTest/app_resources/test.comp.hlsl +++ b/64_EmulatedFloatTest/app_resources/test.comp.hlsl @@ -12,6 +12,7 @@ PushConstants pc; [numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { const nbl::hlsl::emulated_float64_t a = nbl::hlsl::bit_cast >(pc.a); diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index 3fc635e87..a4f177f16 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" @@ -262,9 +262,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); - lp.workingDirectory = ""; // virtual root - // this time we load a shader directly from a file - auto assetBundle = base.m_assetMgr->getAsset("app_resources/test.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + + auto key = nbl::this_example::builtin::build::get_spirv_key<"test">(base.m_device.get()); + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -274,26 +275,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); - - auto* compilerSet = base.m_assetMgr->getCompilerSet(); - - nbl::asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = base.m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - auto spirv = compilerSet->compileToSPIRV(source.get(), options); - - shader = base.m_device->compileShader({spirv.get()}); + shader = IAsset::castDown(assets[0]); } if (!shader) - base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + base.logFail("Failed to load precompiled \"test\" shader!\n"); nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { { @@ -928,9 +914,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); - lp.workingDirectory = ""; // virtual root + lp.workingDirectory = "app_resources"; // virtual root // this time we load a shader directly from a file - auto assetBundle = base.m_assetMgr->getAsset("app_resources/benchmark/benchmark.comp.hlsl", lp); + auto key = nbl::this_example::builtin::build::get_spirv_key<"benchmark">(m_device.get()); + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -940,26 +927,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); - - auto* compilerSet = base.m_assetMgr->getCompilerSet(); - - IShaderCompiler::SCompilerOptions options = {}; - options.stage = ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = base.m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - auto spirv = compilerSet->compileToSPIRV(source.get(), options); - - shader = base.m_device->compileShader({spirv.get()}); + shader = IAsset::castDown(assets[0]); } if (!shader) - base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + base.logFail("Failed to load precompiled \"benchmark\" shader!\n"); nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { { diff --git a/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl b/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl index fcf510b21..2248784e9 100644 --- a/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl +++ b/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl @@ -8,14 +8,14 @@ using namespace nbl::hlsl; +using spectral_t = vector; using ray_dir_info_t = bxdf::ray_dir_info::SBasic; -using iso_interaction = bxdf::surface_interactions::SIsotropic; +using iso_interaction = bxdf::surface_interactions::SIsotropic; using aniso_interaction = bxdf::surface_interactions::SAnisotropic; using sample_t = bxdf::SLightSample; using iso_cache = bxdf::SIsotropicMicrofacetCache; using aniso_cache = bxdf::SAnisotropicMicrofacetCache; using quotient_pdf_t = sampling::quotient_and_pdf; -using spectral_t = vector; using iso_config_t = bxdf::SConfiguration; using aniso_config_t = bxdf::SConfiguration; @@ -32,6 +32,7 @@ void main(uint32_t3 ID : SV_DispatchThreadID) bxdf::reflection::SBeckmannAnisotropic beckmannAnisoBRDF; bxdf::reflection::SGGXIsotropic ggxIsoBRDF; bxdf::reflection::SGGXAnisotropic ggxAnisoBRDF; + bxdf::reflection::SIridescent iridBRDF; bxdf::transmission::SLambertian lambertianBSDF; bxdf::transmission::SOrenNayar orenNayarBSDF; @@ -42,6 +43,7 @@ void main(uint32_t3 ID : SV_DispatchThreadID) bxdf::transmission::SBeckmannDielectricAnisotropic beckmannAnisoBSDF; bxdf::transmission::SGGXDielectricIsotropic ggxIsoBSDF; bxdf::transmission::SGGXDielectricAnisotropic ggxAnisoBSDF; + bxdf::transmission::SIridescent iridBSDF; // do some nonsense calculations, but call all the relevant functions @@ -76,6 +78,9 @@ void main(uint32_t3 ID : SV_DispatchThreadID) s = ggxAnisoBRDF.generate(anisointer, u.xy, cache); L += s.L.direction; + qp = iridBRDF.quotient_and_pdf(s, anisointer, cache); + L -= qp.quotient; + qp = ggxAnisoBRDF.quotient_and_pdf(s, anisointer, cache); L -= qp.quotient; diff --git a/66_HLSLBxDFTests/app_resources/test_components.hlsl b/66_HLSLBxDFTests/app_resources/test_components.hlsl index 9631db05d..a2db7ef53 100644 --- a/66_HLSLBxDFTests/app_resources/test_components.hlsl +++ b/66_HLSLBxDFTests/app_resources/test_components.hlsl @@ -3,11 +3,6 @@ #include "tests_common.hlsl" -namespace nbl -{ -namespace hlsl -{ - template // only for cook torrance bxdfs struct TestNDF : TestBxDF { @@ -75,7 +70,7 @@ struct TestNDF : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -87,12 +82,13 @@ struct TestNDF : TestBxDF float reflectance; bool transmitted; + bool isNdfInfinity; NBL_IF_CONSTEXPR(aniso) { dg1_query_type dq = base_t::bxdf.ndf.template createDG1Query(base_t::anisointer, cache); - fresnel_type _f = bxdf::impl::getOrientedFresnel::__call(base_t::bxdf.fresnel, base_t::anisointer.getNdotV()); - quant_query_type qq = bxdf::impl::quant_query_helper::template __call(base_t::bxdf.ndf, _f, cache); - quant_type DG1 = base_t::bxdf.ndf.template DG1(dq, qq, s, base_t::anisointer); + fresnel_type _f = base_t::bxdf_t::__getOrientedFresnel(base_t::bxdf.fresnel, base_t::anisointer.getNdotV()); + quant_query_type qq = bxdf::impl::quant_query_helper::template __call(base_t::bxdf.ndf, _f, base_t::anisointer, cache); + quant_type DG1 = base_t::bxdf.ndf.template DG1(dq, qq, s, base_t::anisointer, isNdfInfinity); dg1 = DG1.microfacetMeasure * hlsl::abs(cache.getVdotH() / base_t::anisointer.getNdotV()); reflectance = _f(cache.getVdotH())[0]; NdotH = cache.getAbsNdotH(); @@ -101,15 +97,18 @@ struct TestNDF : TestBxDF else { dg1_query_type dq = base_t::bxdf.ndf.template createDG1Query(base_t::isointer, isocache); - fresnel_type _f = bxdf::impl::getOrientedFresnel::__call(base_t::bxdf.fresnel, base_t::isointer.getNdotV()); - quant_query_type qq = bxdf::impl::quant_query_helper::template __call(base_t::bxdf.ndf, _f, isocache); - quant_type DG1 = base_t::bxdf.ndf.template DG1(dq, qq, s, base_t::isointer); + fresnel_type _f = base_t::bxdf_t::__getOrientedFresnel(base_t::bxdf.fresnel, base_t::isointer.getNdotV()); + quant_query_type qq = bxdf::impl::quant_query_helper::template __call(base_t::bxdf.ndf, _f, base_t::isointer, isocache); + quant_type DG1 = base_t::bxdf.ndf.template DG1(dq, qq, s, base_t::isointer, isNdfInfinity); dg1 = DG1.microfacetMeasure * hlsl::abs(isocache.getVdotH() / base_t::isointer.getNdotV()); reflectance = _f(isocache.getVdotH())[0]; NdotH = isocache.getAbsNdotH(); transmitted = isocache.isTransmission(); } + if (isNdfInfinity) + return BET_INVALID; + if (transmitted) { float eta = base_t::rc.eta.x; @@ -148,7 +147,7 @@ struct TestNDF : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -181,13 +180,13 @@ struct TestNDF : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.verbose = initparams.verbose; t.initBxDF(t.rc); @@ -322,7 +321,7 @@ struct TestCTGenerateH : TestBxDF if (base_t::isointer.getNdotV() <= numeric_limits::min) return BET_INVALID; else if (traits_t::type == bxdf::BT_BSDF) - if (abs(base_t::isointer.getNdotV()) <= numeric_limits::min) + if (hlsl::abs(base_t::isointer.getNdotV()) <= numeric_limits::min) return BET_INVALID; ErrorType res = compute(); @@ -334,13 +333,13 @@ struct TestCTGenerateH : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.numSamples = initparams.samples; t.immediateFail = initparams.immediateFail; t.initBxDF(t.rc); @@ -376,7 +375,4 @@ struct TestCTGenerateH : TestBxDF }; #endif -} -} - #endif \ No newline at end of file diff --git a/66_HLSLBxDFTests/app_resources/tests.hlsl b/66_HLSLBxDFTests/app_resources/tests.hlsl index 9011aa2e5..8f26bc4ee 100644 --- a/66_HLSLBxDFTests/app_resources/tests.hlsl +++ b/66_HLSLBxDFTests/app_resources/tests.hlsl @@ -3,11 +3,6 @@ #include "tests_common.hlsl" -namespace nbl -{ -namespace hlsl -{ - template struct TestJacobian : TestBxDF { @@ -69,7 +64,6 @@ struct TestJacobian : TestBxDF if (!(s.isValid() && sx.isValid() && sy.isValid())) return BET_INVALID; - // TODO: add checks with need clamp trait if (traits_t::type == bxdf::BT_BRDF) { if (s.getNdotL() <= bit_cast(numeric_limits::min)) @@ -77,7 +71,7 @@ struct TestJacobian : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -115,7 +109,7 @@ struct TestJacobian : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -163,13 +157,13 @@ struct TestJacobian : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.verbose = initparams.verbose; t.initBxDF(t.rc); @@ -245,7 +239,6 @@ struct TestReciprocity : TestBxDF if (!s.isValid()) return BET_INVALID; - // TODO: add checks with need clamp trait if (bxdf::traits::type == bxdf::BT_BRDF) { if (s.getNdotL() <= bit_cast(numeric_limits::min)) @@ -253,7 +246,7 @@ struct TestReciprocity : TestBxDF } else if (bxdf::traits::type == bxdf::BT_BSDF) { - if (abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(s.getNdotL()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -264,6 +257,7 @@ struct TestReciprocity : TestBxDF rec_s = sample_t::createFromTangentSpace(rec_localL, anisointer.getFromTangentSpace()); rec_isointer = iso_interaction_t::create(rec_V, base_t::rc.N); + rec_isointer.luminosityContributionHint = isointer.luminosityContributionHint; rec_anisointer = aniso_interaction_t::create(rec_isointer, base_t::rc.T, base_t::rc.B); rec_cache = cache; rec_cache.iso_cache.VdotH = cache.iso_cache.getLdotH(); @@ -330,7 +324,7 @@ struct TestReciprocity : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -363,13 +357,13 @@ struct TestReciprocity : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.verbose = initparams.verbose; t.initBxDF(t.rc); @@ -517,7 +511,7 @@ struct TestBucket : TestBxDF } else if (traits_t::type == bxdf::BT_BSDF) { - if (abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) + if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast(numeric_limits::min)) return BET_INVALID; } @@ -530,13 +524,13 @@ struct TestBucket : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.numSamples = initparams.samples; t.initBxDF(t.rc); @@ -735,7 +729,7 @@ struct TestChi2 : TestBxDF void writeToEXR() { - std::string filename = std::format("chi2test_{}_{}.exr", base_t::rc.state, base_t::name); + std::string filename = std::format("chi2test_{}_{}.exr", base_t::rc.halfSeed, base_t::name); int totalWidth = phiSplits; int totalHeight = 2 * thetaSplits + 1; @@ -869,7 +863,7 @@ struct TestChi2 : TestBxDF cache.iso_cache.absNdotH = hlsl::abs(hlsl::dot(N, H)); cache.iso_cache.NdotH2 = cache.iso_cache.absNdotH * cache.iso_cache.absNdotH; - if (!cache.isValid(bxdf::fresnel::OrientedEtas >::create(1.f, hlsl::promote >(eta)))) + if (!cache.isValid(bxdf::fresnel::OrientedEtas >::create(1.f, hlsl::promote >(eta)))) return 0.f; const float32_t3 T = base_t::anisointer.getT(); @@ -911,7 +905,7 @@ struct TestChi2 : TestBxDF if (base_t::isointer.getNdotV() <= numeric_limits::min) return BET_INVALID; else if (traits_t::type == bxdf::BT_BSDF) - if (abs(base_t::isointer.getNdotV()) <= numeric_limits::min) + if (hlsl::abs(base_t::isointer.getNdotV()) <= numeric_limits::min) return BET_INVALID; ErrorType res = compute(); @@ -994,13 +988,13 @@ struct TestChi2 : TestBxDF static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb) { - random::PCG32 pcg = random::PCG32::construct(initparams.state); + random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed); random::DimAdaptorRecursive rand2d = random::DimAdaptorRecursive::construct(pcg); uint32_t2 state = rand2d(); this_t t; t.init(state); - t.rc.state = initparams.state; + t.rc.halfSeed = initparams.halfSeed; t.numSamples = initparams.samples; t.thetaSplits = initparams.thetaSplits; t.phiSplits = initparams.phiSplits; @@ -1034,7 +1028,4 @@ struct TestChi2 : TestBxDF }; #endif -} -} - #endif \ No newline at end of file diff --git a/66_HLSLBxDFTests/app_resources/tests_common.hlsl b/66_HLSLBxDFTests/app_resources/tests_common.hlsl index c0a8d9614..a9a3ef2ec 100644 --- a/66_HLSLBxDFTests/app_resources/tests_common.hlsl +++ b/66_HLSLBxDFTests/app_resources/tests_common.hlsl @@ -41,31 +41,29 @@ using namespace IMATH; using json = nlohmann::json; #endif -namespace nbl -{ -namespace hlsl -{ +using namespace nbl; +using namespace hlsl; +using spectral_t = hlsl::vector; using ray_dir_info_t = bxdf::ray_dir_info::SBasic; -using iso_interaction = bxdf::surface_interactions::SIsotropic; +using iso_interaction = bxdf::surface_interactions::SIsotropic; using aniso_interaction = bxdf::surface_interactions::SAnisotropic; using sample_t = bxdf::SLightSample; using iso_cache = bxdf::SIsotropicMicrofacetCache; using aniso_cache = bxdf::SAnisotropicMicrofacetCache; using quotient_pdf_t = sampling::quotient_and_pdf; -using spectral_t = vector; using iso_config_t = bxdf::SConfiguration; using aniso_config_t = bxdf::SConfiguration; using iso_microfacet_config_t = bxdf::SMicrofacetConfiguration; using aniso_microfacet_config_t = bxdf::SMicrofacetConfiguration; -using bool32_t3 = vector; +using bool32_t3 = hlsl::vector; template struct ConvertToFloat01 { - using ret_t = conditional_t::Dimension==1, float, vector::Dimension> >; + using ret_t = conditional_t::Dimension==1, float, hlsl::vector::Dimension> >; static ret_t __call(T x) { @@ -78,19 +76,27 @@ bool checkEq(T a, T b, float32_t eps) { T _a = hlsl::abs(a); T _b = hlsl::abs(b); - return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); + return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); +} + +template<> +bool checkEq(float32_t a, float32_t b, float32_t eps) +{ + float32_t _a = hlsl::abs(a); + float32_t _b = hlsl::abs(b); + return nbl::hlsl::max(_a / _b, _b / _a) <= float32_t(1 + eps); } template bool checkLt(T a, T b) { - return nbl::hlsl::all::Dimension> >(a < b); + return nbl::hlsl::all::Dimension> >(a < b); } template bool checkZero(T a, float32_t eps) { - return nbl::hlsl::all::Dimension> >(nbl::hlsl::abs(a) < hlsl::promote(eps)); + return nbl::hlsl::all::Dimension> >(nbl::hlsl::abs(a) < hlsl::promote(eps)); } template<> @@ -110,12 +116,9 @@ struct SBxDFTestResources retval.u = ConvertToFloat01::__call(rng_vec3()); retval.u.x = hlsl::clamp(retval.u.x, retval.eps, 1.f-retval.eps); retval.u.y = hlsl::clamp(retval.u.y, retval.eps, 1.f-retval.eps); - // retval.u.z = 0.0; retval.V.direction = nbl::hlsl::normalize(sampling::UniformSphere::generate(ConvertToFloat01::__call(rng_vec2()))); retval.N = nbl::hlsl::normalize(sampling::UniformSphere::generate(ConvertToFloat01::__call(rng_vec2()))); - // if (hlsl::dot(retval.N, retval.V.direction) < 0) - // retval.V.direction = -retval.V.direction; float32_t3 tangent, bitangent; math::frisvad(retval.N, tangent, bitangent); @@ -131,11 +134,14 @@ struct SBxDFTestResources retval.alpha.y = ConvertToFloat01::__call(retval.rng()); retval.eta = ConvertToFloat01::__call(rng_vec2()) * hlsl::promote(1.5) + hlsl::promote(1.1); // range [1.1,2.6], also only do eta = eta/1.0 (air) retval.luma_coeff = float32_t3(0.2126, 0.7152, 0.0722); // luma coefficients for Rec. 709 + + retval.Dinc = ConvertToFloat01::__call(retval.rng()) * 2400.0f + 100.0f; + retval.etaThinFilm = ConvertToFloat01::__call(retval.rng()) * 0.5 + 1.1f; // range [1.1,1.6] return retval; } float eps = 1e-3; // epsilon - uint32_t state; // init state seed, for debugging + uint32_t halfSeed; // init state seed, for debugging nbl::hlsl::Xoroshiro64Star rng; ray_dir_info_t V; @@ -147,12 +153,16 @@ struct SBxDFTestResources float32_t2 alpha; float32_t2 eta; // (eta, etak) float32_t3 luma_coeff; + + // thin film stuff; + float Dinc; // in nm [100, 2500] + float etaThinFilm; }; struct STestInitParams { bool logInfo; - uint32_t state; + uint32_t halfSeed; uint32_t samples; uint32_t thetaSplits; uint32_t phiSplits; @@ -184,6 +194,7 @@ struct TestBase rc = SBxDFTestResources::create(seed); isointer = iso_interaction::create(rc.V, rc.N); + isointer.luminosityContributionHint = rc.luma_coeff; anisointer = aniso_interaction::create(isointer, rc.T, rc.B); } @@ -315,6 +326,28 @@ struct TestBxDF> : } }; +template<> +struct TestBxDF> : TestBxDFBase> +{ + using base_t = TestBxDFBase>; + + void initBxDF(SBxDFTestResources _rc) + { + base_t::bxdf.ndf = base_t::bxdf_t::ndf_type::create(_rc.alpha.x); + using creation_params_t = base_t::bxdf_t::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = _rc.Dinc; + params.ior1 = hlsl::promote(1.0); + params.ior2 = hlsl::promote(_rc.etaThinFilm); + params.ior3 = hlsl::promote(_rc.eta.x); + params.iork3 = hlsl::promote(_rc.eta.y); + base_t::bxdf.fresnel = base_t::bxdf_t::fresnel_type::create(params); +#ifndef __HLSL_VERSION + base_t::name = "Iridescent BRDF"; +#endif + } +}; + template<> struct TestBxDF> : TestBxDFBase> { @@ -354,7 +387,6 @@ struct TestBxDF> : TestB { using spectral_type = typename base_t::bxdf_t::spectral_type; base_t::bxdf.fresnel = bxdf::fresnel::Dielectric::create(bxdf::fresnel::OrientedEtas::create(base_t::isointer.getNdotV(bxdf::BxDFClampMode::BCM_ABS), hlsl::promote(_rc.eta.x))); - base_t::bxdf.luminosityContributionHint = _rc.luma_coeff; #ifndef __HLSL_VERSION base_t::name = "Thin smooth dielectric BSDF"; #endif @@ -438,37 +470,62 @@ struct TestBxDF> : TestBxD } }; +template +struct TestBxDF> : TestBxDFBase> +{ + using base_t = TestBxDFBase>; + + void initBxDF(SBxDFTestResources _rc) + { + base_t::bxdf.ndf = base_t::bxdf_t::ndf_type::create(_rc.alpha.x); + using creation_params_t = base_t::bxdf_t::fresnel_type::creation_params_type; + creation_params_t params; + params.Dinc = _rc.Dinc; + params.ior1 = hlsl::promote(1.0); + params.ior2 = hlsl::promote(_rc.etaThinFilm); + params.ior3 = hlsl::promote(_rc.eta.x); + base_t::bxdf.fresnel = base_t::bxdf_t::fresnel_type::create(params); +#ifndef __HLSL_VERSION + base_t::name = "Iridescent BSDF"; +#endif + } +}; + namespace reciprocity_test_impl { -template) +template && concepts::FloatingPointLikeVectorial) struct SIsotropic { + using this_t = SIsotropic; using ray_dir_info_type = RayDirInfo; using scalar_type = typename RayDirInfo::scalar_type; using vector3_type = typename RayDirInfo::vector3_type; + using spectral_type = Spectrum; // WARNING: Changed since GLSL, now arguments need to be normalized! - static SIsotropic create(NBL_CONST_REF_ARG(RayDirInfo) normalizedV, const vector3_type normalizedN) + static this_t create(NBL_CONST_REF_ARG(RayDirInfo) normalizedV, const vector3_type normalizedN) { - SIsotropic retval; + this_t retval; retval.V = normalizedV; retval.N = normalizedN; retval.NdotV = nbl::hlsl::dot(retval.N, retval.V.getDirection()); retval.NdotV2 = retval.NdotV * retval.NdotV; + retval.luminosityContributionHint = hlsl::promote(1.0); return retval; } template) - static SIsotropic copy(NBL_CONST_REF_ARG(I) other) + static this_t copy(NBL_CONST_REF_ARG(I) other) { - SIsotropic retval; + this_t retval; retval.V = other.getV(); retval.N = other.getN(); retval.NdotV = other.getNdotV(); retval.NdotV2 = other.getNdotV2(); retval.pathOrigin = bxdf::PathOrigin::PO_SENSOR; + retval.luminosityContributionHint = other.luminosityContributionHint; return retval; } @@ -481,12 +538,14 @@ struct SIsotropic scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return NdotV2; } bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return pathOrigin; } + spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return luminosityContributionHint; } RayDirInfo V; vector3_type N; scalar_type NdotV; scalar_type NdotV2; bxdf::PathOrigin pathOrigin; + spectral_type luminosityContributionHint; }; template) @@ -497,7 +556,8 @@ struct SAnisotropic using ray_dir_info_type = typename isotropic_interaction_type::ray_dir_info_type; using scalar_type = typename ray_dir_info_type::scalar_type; using vector3_type = typename ray_dir_info_type::vector3_type; - using matrix3x3_type = matrix; + using matrix3x3_type = hlsl::matrix; + using spectral_type = typename isotropic_interaction_type::spectral_type; // WARNING: Changed since GLSL, now arguments need to be normalized! static this_t create( @@ -551,6 +611,7 @@ struct SAnisotropic scalar_type getNdotV(bxdf::BxDFClampMode _clamp = bxdf::BxDFClampMode::BCM_NONE) NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV(_clamp); } scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV2(); } bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return isotropic.getPathOrigin(); } + spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return isotropic.getLuminosityContributionHint(); } vector3_type getT() NBL_CONST_MEMBER_FUNC { return T; } vector3_type getB() NBL_CONST_MEMBER_FUNC { return B; } @@ -585,10 +646,10 @@ struct CustomIsoMicrofacetConfiguration; - using vector3_type = vector; - using monochrome_type = vector; - using matrix3x3_type = matrix; + using vector2_type = hlsl::vector; + using vector3_type = hlsl::vector; + using monochrome_type = hlsl::vector; + using matrix3x3_type = hlsl::matrix; using isotropic_interaction_type = Interaction; using anisotropic_interaction_type = reciprocity_test_impl::SAnisotropic; using sample_type = LS; @@ -599,12 +660,9 @@ struct CustomIsoMicrofacetConfiguration; +using rectest_iso_interaction = reciprocity_test_impl::SIsotropic; using rectest_aniso_interaction = reciprocity_test_impl::SAnisotropic; using rectest_iso_microfacet_config_t = reciprocity_test_impl::CustomIsoMicrofacetConfiguration; using rectest_aniso_microfacet_config_t = bxdf::SMicrofacetConfiguration; -} -} - #endif diff --git a/66_HLSLBxDFTests/main.cpp b/66_HLSLBxDFTests/main.cpp index a65b443c9..e4b43f4d7 100644 --- a/66_HLSLBxDFTests/main.cpp +++ b/66_HLSLBxDFTests/main.cpp @@ -33,34 +33,34 @@ struct PrintFailureCallback : FailureCallback { case BET_INVALID: if (logInfo) - fprintf(stderr, "[INFO] seed %u: %s skipping test due to invalid NdotV/NdotL config\n", failedFor.rc.state, failedFor.name.c_str()); + fprintf(stderr, "[INFO] seed %u: %s skipping test due to invalid NdotV/NdotL config\n", failedFor.rc.halfSeed, failedFor.name.c_str()); break; case BET_NEGATIVE_VAL: - fprintf(stderr, "[ERROR] seed %u: %s pdf/quotient/eval < 0\n", failedFor.rc.state, failedFor.name.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s pdf/quotient/eval < 0\n", failedFor.rc.halfSeed, failedFor.name.c_str()); break; case BET_PDF_ZERO: - fprintf(stderr, "[ERROR] seed %u: %s pdf = 0\n", failedFor.rc.state, failedFor.name.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s pdf = 0\n", failedFor.rc.halfSeed, failedFor.name.c_str()); break; case BET_QUOTIENT_INF: - fprintf(stderr, "[ERROR] seed %u: %s quotient -> inf\n", failedFor.rc.state, failedFor.name.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s quotient -> inf\n", failedFor.rc.halfSeed, failedFor.name.c_str()); break; case BET_JACOBIAN: - fprintf(stderr, "[ERROR] seed %u: %s failed the jacobian * pdf test %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s failed the jacobian * pdf test %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str()); break; case BET_PDF_EVAL_DIFF: - fprintf(stderr, "[ERROR] seed %u: %s quotient * pdf != eval %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s quotient * pdf != eval %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str()); break; case BET_RECIPROCITY: - fprintf(stderr, "[ERROR] seed %u: %s failed the reciprocity test %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s failed the reciprocity test %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str()); break; case BET_PRINT_MSG: - fprintf(stderr, "[ERROR] seed %u: %s error message\n%s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s error message\n%s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str()); break; case BET_GENERATE_H: - fprintf(stderr, "[ERROR] seed %u: %s failed invalid H configuration generated %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s failed invalid H configuration generated %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str()); break; default: - fprintf(stderr, "[ERROR] seed %u: %s unknown error\n", failedFor.rc.state, failedFor.name.c_str()); + fprintf(stderr, "[ERROR] seed %u: %s unknown error\n", failedFor.rc.halfSeed, failedFor.name.c_str()); } #ifdef _NBL_DEBUG @@ -183,7 +183,7 @@ int main(int argc, char** argv) auto rJacobian = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN(rJacobian) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = i; + initparams.halfSeed = i; initparams.verbose = testconfigs["TestJacobian"]["verbose"]; TestJacobian>::run(initparams, cb); @@ -193,6 +193,7 @@ int main(int argc, char** argv) TestJacobian, true>::run(initparams, cb); TestJacobian, false>::run(initparams, cb); TestJacobian,true>::run(initparams, cb); + TestJacobian, false>::run(initparams, cb); TestJacobian>::run(initparams, cb); TestJacobian>::run(initparams, cb); @@ -203,6 +204,7 @@ int main(int argc, char** argv) TestJacobian, true>::run(initparams, cb); TestJacobian, false>::run(initparams, cb); TestJacobian,true>::run(initparams, cb); + TestJacobian, false>::run(initparams, cb); FOR_EACH_END @@ -211,7 +213,7 @@ int main(int argc, char** argv) auto rReciprocity = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN(rReciprocity) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = 3; + initparams.halfSeed = i; initparams.verbose = testconfigs["TestReciprocity"]["verbose"]; TestReciprocity>::run(initparams, cb); @@ -221,16 +223,18 @@ int main(int argc, char** argv) TestReciprocity, true>::run(initparams, cb); TestReciprocity, false>::run(initparams, cb); TestReciprocity, true>::run(initparams, cb); + TestReciprocity, false>::run(initparams, cb); TestReciprocity>::run(initparams, cb); TestReciprocity>::run(initparams, cb); - TestReciprocity>::run(initparams, cb); + TestReciprocity>::run(initparams, cb); TestReciprocity>::run(initparams, cb); TestReciprocity>::run(initparams, cb); TestReciprocity, false>::run(initparams, cb); TestReciprocity, true>::run(initparams, cb); TestReciprocity, false>::run(initparams, cb); TestReciprocity, true>::run(initparams, cb); + TestReciprocity, false>::run(initparams, cb); FOR_EACH_END @@ -240,7 +244,7 @@ int main(int argc, char** argv) auto rBucket = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN(rBucket) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = i; + initparams.halfSeed = i; initparams.samples = testconfigs["TestBucket"]["samples"]; TestBucket>::run(initparams, cb); @@ -249,6 +253,7 @@ int main(int argc, char** argv) TestBucket, true>::run(initparams, cb); TestBucket, false>::run(initparams, cb); TestBucket, true>::run(initparams, cb); + TestBucket, false>::run(initparams, cb); TestBucket>::run(initparams, cb); TestBucket>::run(initparams, cb); @@ -256,6 +261,7 @@ int main(int argc, char** argv) TestBucket, true>::run(initparams, cb); TestBucket, false>::run(initparams, cb); TestBucket, true>::run(initparams, cb); + TestBucket, false>::run(initparams, cb); FOR_EACH_END @@ -264,7 +270,7 @@ int main(int argc, char** argv) auto rChi2 = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN_EX(rChi2, std::execution::par_unseq) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = i; + initparams.halfSeed = i; initparams.samples = testconfigs["TestChi2"]["samples"]; initparams.thetaSplits = testconfigs["TestChi2"]["thetaSplits"]; initparams.phiSplits = testconfigs["TestChi2"]["phiSplits"]; @@ -276,6 +282,7 @@ int main(int argc, char** argv) TestChi2, true>::run(initparams, cb); TestChi2, false>::run(initparams, cb); TestChi2, true>::run(initparams, cb); + TestChi2, false>::run(initparams, cb); TestChi2>::run(initparams, cb); TestChi2>::run(initparams, cb); @@ -283,15 +290,15 @@ int main(int argc, char** argv) TestChi2, true>::run(initparams, cb); TestChi2, false>::run(initparams, cb); TestChi2, true>::run(initparams, cb); + TestChi2, false>::run(initparams, cb); FOR_EACH_END -#if 0 // testing ndf jacobian * dg1, ONLY for cook torrance bxdfs runs = testconfigs["TestNDF"]["runs"]; auto rNdf = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN(rNdf) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = i; + initparams.halfSeed = i; initparams.verbose = testconfigs["TestNDF"]["verbose"]; TestNDF, false>::run(initparams, cb); @@ -304,14 +311,13 @@ int main(int argc, char** argv) TestNDF, false>::run(initparams, cb); TestNDF, true>::run(initparams, cb); FOR_EACH_END -#endif -#if 0 + // test generated H that NdotV*VdotH>=0.0, VdotL calculation runs = testconfigs["TestCTGenerateH"]["runs"]; auto rGenerateH = std::ranges::views::iota(0u, runs); FOR_EACH_BEGIN_EX(rGenerateH, std::execution::par_unseq) STestInitParams initparams{ .logInfo = logInfo }; - initparams.state = i; + initparams.halfSeed = i; initparams.samples = testconfigs["TestCTGenerateH"]["samples"]; initparams.immediateFail = testconfigs["TestCTGenerateH"]["immediateFail"]; @@ -325,27 +331,39 @@ int main(int argc, char** argv) TestCTGenerateH, false>::run(initparams, cb); TestCTGenerateH, true>::run(initparams, cb); FOR_EACH_END -#endif // test arccos angle sums { Xoroshiro64Star rng = Xoroshiro64Star::construct(uint32_t2(4, 2)); + math::sincos_accumulator angle_adder; + + auto Sin = [&](const float cosA) -> float + { + return nbl::hlsl::sqrt(1.f - cosA * cosA); + }; + for (uint32_t i = 0; i < 10; i++) { - const float a = rng() * numbers::pi; - const float b = rng() * numbers::pi; - const float c = rng() * numbers::pi; - const float d = rng() * numbers::pi; + const float a = ConvertToFloat01::__call(rng()) * 2.f - 1.f; + const float b = ConvertToFloat01::__call(rng()) * 2.f - 1.f; + const float c = ConvertToFloat01::__call(rng()) * 2.f - 1.f; + const float d = ConvertToFloat01::__call(rng()) * 2.f - 1.f; const float exAB = acos(a) + acos(b); - float res = math::getSumofArccosAB(a, b); - if (res != exAB) - fprintf(stderr, "[ERROR] math::getSumofArccosAB failed! expected %f, got %f\n", exAB, res); + angle_adder = math::sincos_accumulator::create(a, Sin(a)); + angle_adder.addAngle(b, Sin(b)); + float res = angle_adder.getSumofArccos(); + if (!checkEq(res, exAB, 1e-3)) + fprintf(stderr, "[ERROR] angle adding (2 angles) failed! expected %f, got %f\n", exAB, res); const float exABCD = exAB + acos(c) + acos(d); - res = math::getSumofArccosABCD(a, b, c, d); - if (res != exABCD) - fprintf(stderr, "[ERROR] math::getSumofArccosABCD failed! expected %f, got %f\n", exABCD, res); + angle_adder = math::sincos_accumulator::create(a, Sin(a)); + angle_adder.addAngle(b, Sin(b)); + angle_adder.addAngle(c, Sin(c)); + angle_adder.addAngle(d, Sin(d)); + res = angle_adder.getSumofArccos(); + if (!checkEq(res, exABCD, 1e-3)) + fprintf(stderr, "[ERROR] angle adding (4 angles) failed! expected %f, got %f\n", exABCD, res); } } diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index d26a90205..1fdfc03ce 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -25,4 +25,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/render.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/render.comp.hlsl", + "KEY": "render", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl index 68a353adc..ecac0f59d 100644 --- a/67_RayQueryGeometry/app_resources/common.hlsl +++ b/67_RayQueryGeometry/app_resources/common.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSize = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = 16; enum NormalType : uint32_t { diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl index 954598c9a..889e1f38b 100644 --- a/67_RayQueryGeometry/app_resources/render.comp.hlsl +++ b/67_RayQueryGeometry/app_resources/render.comp.hlsl @@ -1,7 +1,5 @@ #include "common.hlsl" -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index 2783385f2..b35000485 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -150,8 +151,10 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const std::string shaderPath = "app_resources/render.comp.hlsl"; IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(shaderPath, lparams); + lparams.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key<"render">(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, shaderPath); @@ -160,10 +163,9 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - auto shader = m_device->compileShader({shaderSrc.get()}); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); if (!shader) - return logFail("Failed to create shader!"); + return logFail("Failed to load precompiled shader!"); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0u, .size = sizeof(SPushConstants)}; auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr(renderDs->getLayout()), nullptr, nullptr, nullptr); diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt index a434ff32a..842492167 100644 --- a/70_FLIPFluids/CMakeLists.txt +++ b/70_FLIPFluids/CMakeLists.txt @@ -21,4 +21,100 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/compute/advectParticles.comp.hlsl + app_resources/compute/applyBodyForces.comp.hlsl + app_resources/compute/diffusion.comp.hlsl + app_resources/compute/genParticleVertices.comp.hlsl + app_resources/compute/particlesInit.comp.hlsl + app_resources/compute/prepareCellUpdate.comp.hlsl + app_resources/compute/pressureSolver.comp.hlsl + app_resources/compute/updateFluidCells.comp.hlsl + app_resources/cellUtils.hlsl + app_resources/common.hlsl + app_resources/descriptor_bindings.hlsl + app_resources/fluidParticles.fragment.hlsl + app_resources/fluidParticles.vertex.hlsl + app_resources/gridSampling.hlsl + app_resources/gridUtils.hlsl + app_resources/render_common.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/compute/diffusion.comp.hlsl", + "KEY": "diffusion", + }, + { + "INPUT": "app_resources/fluidParticles.vertex.hlsl", + "KEY": "fluidParticles_vertex", + }, + { + "INPUT": "app_resources/fluidParticles.fragment.hlsl", + "KEY": "fluidParticles_fragment", + }, + { + "INPUT": "app_resources/compute/particlesInit.comp.hlsl", + "KEY": "particlesInit", + }, + { + "INPUT": "app_resources/compute/genParticleVertices.comp.hlsl", + "KEY": "genParticleVertices", + }, + { + "INPUT": "app_resources/compute/prepareCellUpdate.comp.hlsl", + "KEY": "prepareCellUpdate", + }, + { + "INPUT": "app_resources/compute/updateFluidCells.comp.hlsl", + "KEY": "updateFluidCells", + }, + { + "INPUT": "app_resources/compute/applyBodyForces.comp.hlsl", + "KEY": "applyBodyForces", + }, + { + "INPUT": "app_resources/compute/pressureSolver.comp.hlsl", + "KEY": "pressureSolver", + }, + { + "INPUT": "app_resources/compute/advectParticles.comp.hlsl", + "KEY": "advectParticles", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl index e53c91d2d..288b82764 100644 --- a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl @@ -67,6 +67,7 @@ void setAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID) } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void setNeighborAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; @@ -127,6 +128,7 @@ float3 calculateDiffusionVelStep(int3 idx, float3 sampledVelocity, uint cellMate } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID) { uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID(); @@ -212,6 +214,7 @@ void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID) // TODO: same as the pressure solver, this kernel/dispatch should be fused onto `iterateDiffusion` guarded by `isLastIteration` push constant [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void applyDiffusion(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl index b5db995c5..e71f05912 100644 --- a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl @@ -89,6 +89,7 @@ float calculatePressureStep(int3 idx) } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID) { uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID(); @@ -168,6 +169,7 @@ void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID) // TODO: why doesn't the last invocation of `iteratePressureSystem` have this step fused into it!? It would be just a simple push constant `isLastIteration` that would decide whether to run this dispatch [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void updateVelocities(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl index 62ddfd822..ea37660c1 100644 --- a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl @@ -23,6 +23,7 @@ cbuffer GridData // TODO: f 0 is AIR, and >=2 is SOLID, we can perform Atomic OR 0b01 to have a particle set the cell to FLUID, and this dispatch looping over all grid cells is not needed! [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void updateFluidCells(uint32_t3 ID : SV_DispatchThreadID) { int3 cIdx = ID; diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 899d00ba4..a70064245 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" // TODO: why is it not in nabla.h ? @@ -344,11 +345,12 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (!initGraphicsPipeline()) return logFail("Failed to initialize render pipeline!\n"); - auto createComputePipeline = [&](smart_refctd_ptr& pipeline, smart_refctd_ptr& pool, - smart_refctd_ptr& set, const std::string& shaderPath, const std::string& entryPoint, + + auto createComputePipeline = [&](smart_refctd_ptr& pipeline, smart_refctd_ptr& pool, + smart_refctd_ptr& set, const std::string& entryPoint, const std::span bindings, const asset::SPushConstantRange& pcRange = {}) -> void { - auto shader = compileShader(shaderPath, entryPoint); + auto shader = loadPrecompiledShader(); auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(bindings); @@ -378,8 +380,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // init particles pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_initParticlePipeline, m_initParticlePool, m_initParticleDs, - "app_resources/compute/particlesInit.comp.hlsl", "main", piParticlesInit_bs1, pcRange); + createComputePipeline.operator()<"particlesInit">(m_initParticlePipeline, m_initParticlePool, m_initParticleDs, + "main", piParticlesInit_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[1]; @@ -395,8 +397,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // generate particle vertex pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 3 * sizeof(uint64_t) }; - createComputePipeline(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs, - "app_resources/compute/genParticleVertices.comp.hlsl", "main", gpvGenVertices_bs1, pcRange); + createComputePipeline.operator()<"genParticleVertices">(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs, + "main", gpvGenVertices_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -414,8 +416,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso // update fluid cells pipelines { const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs, - "app_resources/compute/prepareCellUpdate.comp.hlsl", "main", ufcAccWeights_bs1, pcRange); + createComputePipeline.operator()<"prepareCellUpdate">(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs, + "main", ufcAccWeights_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -457,8 +459,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs, - "app_resources/compute/updateFluidCells.comp.hlsl", "updateFluidCells", ufcFluidCell_bs1); + createComputePipeline.operator()<"updateFluidCells">(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs, + "updateFluidCells", ufcFluidCell_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -479,8 +481,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs, - "app_resources/compute/updateFluidCells.comp.hlsl", "updateNeighborFluidCells", ufcNeighborCell_bs1); + createComputePipeline.operator()<"updateFluidCells">(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs, + "updateNeighborFluidCells", ufcNeighborCell_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -527,8 +529,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } { // apply forces pipeline - createComputePipeline(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, - "app_resources/compute/applyBodyForces.comp.hlsl", "main", abfApplyForces_bs1); + createComputePipeline.operator()<"applyBodyForces">(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, + "main", abfApplyForces_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -559,8 +561,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } // apply diffusion pipelines { - createComputePipeline(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, - "app_resources/compute/diffusion.comp.hlsl", "setAxisCellMaterial", dAxisCM_bs1); + createComputePipeline.operator()<"diffusion">(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, + "setAxisCellMaterial", dAxisCM_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -581,8 +583,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, - "app_resources/compute/diffusion.comp.hlsl", "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1); + createComputePipeline.operator()<"diffusion">(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, + "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -603,10 +605,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - const std::string iterateKernel = "iterateDiffusion"; - const std::string applyKernel = "applyDiffusion"; - auto iterateShader = compileShader("app_resources/compute/diffusion.comp.hlsl", iterateKernel); - auto applyShader = compileShader("app_resources/compute/diffusion.comp.hlsl", applyKernel); + smart_refctd_ptr diffusion = loadPrecompiledShader<"diffusion">(); // "app_resources/compute/diffusion.comp.hlsl" auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(dDiffuse_bs1); @@ -625,16 +624,16 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); - params.shader.entryPoint = iterateKernel; - params.shader.shader = iterateShader.get(); + params.shader.entryPoint = "iterateDiffusion"; + params.shader.shader = diffusion.get(); m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline); } { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); - params.shader.entryPoint = applyKernel; - params.shader.shader = applyShader.get(); + params.shader.entryPoint = "applyDiffusion"; + params.shader.shader = diffusion.get(); m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline); } @@ -676,8 +675,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } // solve pressure system pipelines { - createComputePipeline(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, - "app_resources/compute/pressureSolver.comp.hlsl", "calculateNegativeDivergence", psDivergence_bs1); + createComputePipeline.operator()<"pressureSolver">(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, + "calculateNegativeDivergence", psDivergence_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -711,8 +710,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs, - "app_resources/compute/pressureSolver.comp.hlsl", "iteratePressureSystem", psIteratePressure_bs1); + createComputePipeline.operator()<"pressureSolver">(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs, + "iteratePressureSystem", psIteratePressure_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[5]; @@ -740,8 +739,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, - "app_resources/compute/pressureSolver.comp.hlsl", "updateVelocities", psUpdateVelPs_bs1); + createComputePipeline.operator()<"pressureSolver">(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, + "updateVelocities", psUpdateVelPs_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[4]; @@ -780,8 +779,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // advect particles pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs, - "app_resources/compute/advectParticles.comp.hlsl", "main", apAdvectParticles_bs1, pcRange); + createComputePipeline.operator()<"advectParticles">(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs, + "main", apAdvectParticles_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -1400,51 +1399,25 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso numParticles = m_gridData.particleInitSize.x * m_gridData.particleInitSize.y * m_gridData.particleInitSize.z * particlesPerCell; } - smart_refctd_ptr compileShader(const std::string& filePath, const std::string& entryPoint = "main") + template + smart_refctd_ptr loadPrecompiledShader() { IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(filePath, lparams); + lparams.workingDirectory = "app_resources"; + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey); exit(-1); } const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - const auto hlslMetadata = static_cast(bundle.getMetadata()); - const auto shaderStage = hlslMetadata->shaderStages->front(); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); - smart_refctd_ptr shader = shaderSrc; - if (entryPoint != "main") - { - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SOptions options = {}; - options.stage = shaderStage; - if (!(options.stage == IShader::E_SHADER_STAGE::ESS_COMPUTE || options.stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT)) - options.stage = IShader::E_SHADER_STAGE::ESS_VERTEX; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - #ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); - #endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = shaderSrc->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - std::string dxcOptionStr[] = {"-E " + entryPoint}; - options.dxcOptions = std::span(dxcOptionStr); - - shader = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); - } - - return m_device->compileShader({ shader.get() }); + return shader; } // TODO: there's a method in IUtilities for this @@ -1563,28 +1536,27 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso // init shaders and pipeline - auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = "app_resources"; + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); - exit(-1); - } + m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey); + exit(-1); + } - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); - return m_device->compileShader({ shaderSrc.get() }); - }; - auto vs = compileShader("app_resources/fluidParticles.vertex.hlsl"); - auto fs = compileShader("app_resources/fluidParticles.fragment.hlsl"); + return shader; + }; + auto vs = loadPrecompiledShader.operator()<"fluidParticles_vertex">(); // "app_resources/fluidParticles.vertex.hlsl" + auto fs = loadPrecompiledShader.operator()<"fluidParticles_fragment">(); // "app_resources/fluidParticles.fragment.hlsl" smart_refctd_ptr descriptorSetLayout1; { diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index 07b0fd396..d7bb13671 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -34,4 +34,104 @@ if(NBL_BUILD_IMGUI) endif() endif() +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/light_directional.rcall.hlsl + app_resources/light_point.rcall.hlsl + app_resources/light_spot.rcall.hlsl + app_resources/present.frag.hlsl + app_resources/raytrace.rahit.hlsl + app_resources/raytrace.rchit.hlsl + app_resources/raytrace.rgen.hlsl + app_resources/raytrace.rint.hlsl + app_resources/raytrace.rmiss.hlsl + app_resources/raytrace_procedural.rchit.hlsl + app_resources/raytrace_shadow.rahit.hlsl + app_resources/raytrace_shadow.rmiss.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/raytrace.rgen.hlsl", + "KEY": "raytrace_rgen", + }, + { + "INPUT": "app_resources/raytrace.rchit.hlsl", + "KEY": "raytrace_rchit", + }, + { + "INPUT": "app_resources/raytrace_procedural.rchit.hlsl", + "KEY": "raytrace_procedural_rchit", + }, + { + "INPUT": "app_resources/raytrace.rint.hlsl", + "KEY": "raytrace_rint", + }, + { + "INPUT": "app_resources/raytrace.rahit.hlsl", + "KEY": "raytrace_rahit", + }, + { + "INPUT": "app_resources/raytrace_shadow.rahit.hlsl", + "KEY": "raytrace_shadow_rahit", + }, + { + "INPUT": "app_resources/raytrace.rmiss.hlsl", + "KEY": "raytrace_rmiss", + }, + { + "INPUT": "app_resources/raytrace_shadow.rmiss.hlsl", + "KEY": "raytrace_shadow_rmiss", + }, + { + "INPUT": "app_resources/light_directional.rcall.hlsl", + "KEY": "light_directional_rcall", + }, + { + "INPUT": "app_resources/light_point.rcall.hlsl", + "KEY": "light_point_rcall", + }, + { + "INPUT": "app_resources/light_spot.rcall.hlsl", + "KEY": "light_spot_rcall", + }, + { + "INPUT": "app_resources/present.frag.hlsl", + "KEY": "present_frag", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + diff --git a/71_RayTracingPipeline/app_resources/common.hlsl b/71_RayTracingPipeline/app_resources/common.hlsl index f9d67af78..502b53160 100644 --- a/71_RayTracingPipeline/app_resources/common.hlsl +++ b/71_RayTracingPipeline/app_resources/common.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/cpp_compat/basic.h" #include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" NBL_CONSTEXPR uint32_t WorkgroupSize = 16; NBL_CONSTEXPR uint32_t MAX_UNORM_10 = 1023; @@ -78,6 +79,9 @@ struct MaterialPacked return (xi>>22) > alpha; } }; +#ifdef __HLSL_VERSION +NBL_REGISTER_OBJ_TYPE(MaterialPacked, 4) +#endif struct SProceduralGeomInfo { @@ -103,6 +107,9 @@ struct STriangleGeomInfo uint32_t indexType : 1; // 16 bit, 32 bit }; +#ifdef __HLSL_VERSION +NBL_REGISTER_OBJ_TYPE(STriangleGeomInfo, 8) +#endif enum E_GEOM_TYPE : uint16_t { diff --git a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl index 956ad5fe6..da7cc1594 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl @@ -10,7 +10,8 @@ using namespace nbl::hlsl; void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get(); const uint32_t bitpattern = payload.pcg(); // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279 diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl index 0a8bc5ec8..e6ebcda78 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl @@ -38,9 +38,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) if (normalBufferAddress == 0) { - float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12); - float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12); - float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12); + float3 v0 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[0]).deref().load(); + float3 v1 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[1]).deref().load(); + float3 v2 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[2]).deref().load(); return normalize(cross(v2 - v0, v1 - v0)); } @@ -50,9 +50,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) { case NT_R8G8B8A8_SNORM: { - uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4); - uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4); - uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4); + uint32_t v0 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[0]).deref().load(); + uint32_t v1 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[1]).deref().load(); + uint32_t v2 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[2]).deref().load(); n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); @@ -61,9 +61,13 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) break; case NT_R32G32B32_SFLOAT: { - n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12)); - n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12)); - n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12)); + float3 v0 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[0]).deref().load(); + float3 v1 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[1]).deref().load(); + float3 v2 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[2]).deref().load(); + + n0 = normalize(v0); + n1 = normalize(v1); + n2 = normalize(v2); } break; } @@ -81,7 +85,8 @@ void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes const int primID = spirv::PrimitiveId; const int instanceCustomIndex = spirv::InstanceCustomIndexKHR; const int geometryIndex = spirv::RayGeometryIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)).Get(); const float32_t3 vertexNormal = calculateNormals(primID, geom, attribs.barycentrics); const float32_t3 worldNormal = normalize(mul(vertexNormal, transpose(spirv::WorldToObjectKHR)).xyz); diff --git a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl index efc99cad9..c42d5a7df 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl @@ -1,6 +1,5 @@ #include "common.hlsl" -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" #include "nbl/builtin/hlsl/random/xoroshiro.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" @@ -80,15 +79,16 @@ void main() Material material; MaterialId materialId = payload.materialId; + const static uint64_t MaterialPackedAlignment = nbl::hlsl::alignment_of_v; // we use negative index to indicate that this is a procedural geometry if (materialId.isHitProceduralGeom()) { - const MaterialPacked materialPacked = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)); + const MaterialPacked materialPacked = vk::BufferPointer(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)).Get(); material = nbl::hlsl::_static_cast(materialPacked); } else { - const MaterialPacked materialPacked = vk::RawBufferLoad(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)); + const MaterialPacked materialPacked = vk::BufferPointer(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)).Get(); material = nbl::hlsl::_static_cast(materialPacked); } diff --git a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl index 72f9beffd..551be1c8a 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl @@ -36,8 +36,9 @@ void main() const int primID = spirv::PrimitiveId; + const static uint64_t SProceduralGeomInfoAlignment = nbl::hlsl::alignment_of_v; // Sphere data - SProceduralGeomInfo sphere = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)); + SProceduralGeomInfo sphere = vk::BufferPointer(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)).Get(); const float32_t tHit = hitSphere(sphere, ray); diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl index e41551512..d87b8dd5d 100644 --- a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl @@ -1,6 +1,7 @@ #include "common.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" using namespace nbl::hlsl; @@ -10,7 +11,8 @@ using namespace nbl::hlsl; void main(inout OcclusionPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get(); const Material material = nbl::hlsl::_static_cast(geom.material); const float attenuation = (1.f-material.alpha) * payload.attenuation; diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 59b610f4b..ecaf53b7f 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -3,6 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/indirect_commands.hlsl" @@ -106,95 +108,42 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - smart_refctd_ptr shaderReadCache = nullptr; - smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); - auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin"; - - { - core::smart_refctd_ptr shaderReadCacheFile; - { - system::ISystem::future_t> future; - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ); - if (future.wait()) - { - future.acquire().move_into(shaderReadCacheFile); - if (shaderReadCacheFile) - { - const size_t size = shaderReadCacheFile->getSize(); - if (size > 0ull) - { - std::vector contents(size); - system::IFile::success_t succ; - shaderReadCacheFile->read(succ, contents.data(), 0, size); - if (succ) - shaderReadCache = IShaderCompiler::CCache::deserialize(contents); - } - } - } - else - m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR); - } - - } - // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return nullptr; // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto sourceRaw = IAsset::castDown(assets[0]); - if (!sourceRaw) + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); return nullptr; + } - return m_device->compileShader({ sourceRaw.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + return shader; }; // load shaders - const auto raygenShader = loadCompileAndCreateShader("app_resources/raytrace.rgen.hlsl"); - const auto closestHitShader = loadCompileAndCreateShader("app_resources/raytrace.rchit.hlsl"); - const auto proceduralClosestHitShader = loadCompileAndCreateShader("app_resources/raytrace_procedural.rchit.hlsl"); - const auto intersectionHitShader = loadCompileAndCreateShader("app_resources/raytrace.rint.hlsl"); - const auto anyHitShaderColorPayload = loadCompileAndCreateShader("app_resources/raytrace.rahit.hlsl"); - const auto anyHitShaderShadowPayload = loadCompileAndCreateShader("app_resources/raytrace_shadow.rahit.hlsl"); - const auto missShader = loadCompileAndCreateShader("app_resources/raytrace.rmiss.hlsl"); - const auto missShadowShader = loadCompileAndCreateShader("app_resources/raytrace_shadow.rmiss.hlsl"); - const auto directionalLightCallShader = loadCompileAndCreateShader("app_resources/light_directional.rcall.hlsl"); - const auto pointLightCallShader = loadCompileAndCreateShader("app_resources/light_point.rcall.hlsl"); - const auto spotLightCallShader = loadCompileAndCreateShader("app_resources/light_spot.rcall.hlsl"); - const auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); - - core::smart_refctd_ptr shaderWriteCacheFile; - { - system::ISystem::future_t> future; - m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE); - if (future.wait()) - { - future.acquire().move_into(shaderWriteCacheFile); - if (shaderWriteCacheFile) - { - auto serializedCache = shaderWriteCache->serialize(); - if (shaderWriteCacheFile) - { - system::IFile::success_t succ; - shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize()); - if (!succ) - m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR); - } - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } + const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" + const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" + const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" + const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" + const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" + const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" + const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" + const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" + const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" + const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" + const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" + const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) diff --git a/72_CooperativeBinarySearch/CMakeLists.txt b/72_CooperativeBinarySearch/CMakeLists.txt new file mode 100644 index 000000000..b7e52875d --- /dev/null +++ b/72_CooperativeBinarySearch/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl new file mode 100644 index 000000000..0834e8f91 --- /dev/null +++ b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl @@ -0,0 +1,120 @@ +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#pragma wave shader_stage(compute) + +#include "common.h" + +#include "nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] PushConstants Constants; +[[vk::binding(0)]] StructuredBuffer Histogram; +[[vk::binding(1)]] RWStructuredBuffer Output; + + +uint getNextPowerOfTwo(uint number) { + return 2 << firstbithigh(number - 1); +} + +uint getLaneWithFirstBitSet(bool condition) { + uint4 ballot = WaveActiveBallot(condition); + if (all(ballot == 0)) { + return WaveGetLaneCount(); + } + return nbl::hlsl::glsl::subgroupBallotFindLSB(ballot); +} + +// findValue must be the same across the entire wave +// Could use something like WaveReadFirstLane to be fully sure +uint binarySearchLowerBoundFindValue(uint findValue, StructuredBuffer searchBuffer, uint searchBufferSize) { + uint lane = WaveGetLaneIndex(); + + uint left = 0; + uint right = searchBufferSize - 1; + + uint32_t range = getNextPowerOfTwo(right - left); + // do pivots as long as we can't coalesced load + while (range > WaveGetLaneCount()) + { + // there must be at least 1 gap between subsequent pivots + const uint32_t step = range / WaveGetLaneCount(); + const uint32_t halfStep = step >> 1; + const uint32_t pivotOffset = lane * step+halfStep; + const uint32_t pivotIndex = left + pivotOffset; + + uint4 notGreaterPivots = WaveActiveBallot(pivotIndex < right && !(findValue < searchBuffer[pivotIndex])); + uint partition = nbl::hlsl::glsl::subgroupBallotBitCount(notGreaterPivots); + // only move left if needed + if (partition != 0) + left += partition * step - halfStep; + // if we go into final half partition, the range becomes less too + range = partition != WaveGetLaneCount() ? step : halfStep; + } + + uint threadSearchIndex = left + lane; + bool laneValid = threadSearchIndex < searchBufferSize; + uint histAtIndex = laneValid ? searchBuffer[threadSearchIndex] : -1; + uint firstLaneGreaterThan = getLaneWithFirstBitSet(histAtIndex > findValue); + + return left + firstLaneGreaterThan - 1; +} + +static const uint32_t GroupsharedSize = WorkgroupSize; +groupshared uint shared_groupSearchBufferMinIndex; +groupshared uint shared_groupSearchBufferMaxIndex; +groupshared uint shared_groupSearchValues[WorkgroupSize]; + +// Binary search using the entire workgroup, making it log32 or log64 (every iteration, the possible set of +// values is divided by the number of lanes in a wave) +uint binarySearchLowerBoundCooperative(uint groupIndex, uint groupThread, StructuredBuffer searchBuffer, uint searchBufferSize) { + uint minSearchValue = groupIndex.x * GroupsharedSize; + uint maxSearchValue = ((groupIndex.x + 1) * GroupsharedSize) - 1; + + // On each workgroup, two subgroups do the search + // - One searches for the minimum, the other searches for the maximum + // - Store the minimum and maximum on groupshared memory, then do a barrier + uint wave = groupThread / WaveGetLaneCount(); + if (wave < 2) { + uint search = wave == 0 ? minSearchValue : maxSearchValue; + uint searchResult = binarySearchLowerBoundFindValue(search, searchBuffer, searchBufferSize); + if (WaveIsFirstLane()) { + if (wave == 0) shared_groupSearchBufferMinIndex = searchResult; + else shared_groupSearchBufferMaxIndex = searchResult; + } + } + GroupMemoryBarrierWithGroupSync(); + + // Since every instance has at least one triangle, we know that having workgroup values + // for each value in the range of minimum to maximum will suffice. + + // Write every value in the range to groupshared memory and barrier. + uint idx = shared_groupSearchBufferMinIndex + groupThread.x; + if (idx <= shared_groupSearchBufferMaxIndex) { + shared_groupSearchValues[groupThread.x] = searchBuffer[idx]; + } + GroupMemoryBarrierWithGroupSync(); + + uint maxValueIndex = shared_groupSearchBufferMaxIndex - shared_groupSearchBufferMinIndex; + + uint searchValue = minSearchValue + groupThread; + uint currentSearchValueIndex = 0; + uint laneValue = shared_groupSearchBufferMaxIndex; + while (currentSearchValueIndex <= maxValueIndex) { + uint curValue = shared_groupSearchValues[currentSearchValueIndex]; + if (curValue > searchValue) { + laneValue = shared_groupSearchBufferMinIndex + currentSearchValueIndex - 1; + break; + } + currentSearchValueIndex ++; + } + + return laneValue; +} + +[numthreads(WorkgroupSize,1,1)] +void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID) +{ + Output[thread.x] = binarySearchLowerBoundCooperative(group.x, groupThread.x, Histogram, Constants.EntityCount); +} \ No newline at end of file diff --git a/72_CooperativeBinarySearch/app_resources/common.h b/72_CooperativeBinarySearch/app_resources/common.h new file mode 100644 index 000000000..65f606b08 --- /dev/null +++ b/72_CooperativeBinarySearch/app_resources/common.h @@ -0,0 +1,15 @@ +#ifndef _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ +#define _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ + +#include +#include + +// TODO: NBL_CONSTEXPR_NSPC_VAR +static const uint32_t WorkgroupSize = 256; + +struct PushConstants +{ + uint32_t EntityCount; +}; + +#endif // _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ diff --git a/72_CooperativeBinarySearch/config.json.template b/72_CooperativeBinarySearch/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/72_CooperativeBinarySearch/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..3745ca512 --- /dev/null +++ b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp @@ -0,0 +1,11 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + +#include "nbl/examples/examples.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ? +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp new file mode 100644 index 000000000..81724c1b8 --- /dev/null +++ b/72_CooperativeBinarySearch/main.cpp @@ -0,0 +1,266 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/system/IApplicationFramework.h" +#include "app_resources/common.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +// +constexpr uint32_t TestCaseIndices[] = { +#include "testCaseData.h" +}; +constexpr uint32_t numIndices = sizeof(TestCaseIndices) / sizeof(TestCaseIndices[0]); +constexpr uint32_t lastValue = TestCaseIndices[numIndices - 1]; +// just some extra stuff over the edge +constexpr uint32_t totalValues = lastValue + 100; + + +void cpu_tests(); + +class CooperativeBinarySearch final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_queue = m_device->getQueue(0, 0); + m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); + + smart_refctd_ptr shader; + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset("app_resources/binarySearch.comp.hlsl", lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + auto source = IAsset::castDown(assets[0]); + // The down-cast should not fail! + assert(source); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + shader = m_device->compileShader({ source.get() }); + if (!shader) + return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + } + + const uint32_t bindingCount = 2u; + IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {}; + bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(0)]] StructuredBuffer Histogram; + bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(1)]] RWStructuredBuffer Output; + + for(int i = 0; i < bindingCount; ++i) + { + bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + bindings[i].count = 1; + bindings[i].binding = i; + } + m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + { + SPushConstantRange pcRange = {}; + pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + pcRange.offset = 0u; + pcRange.size = sizeof(PushConstants); + auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + return logFail("Failed to create compute pipeline!\n"); + } + + const size_t sizes[2] = {sizeof(TestCaseIndices),sizeof(uint32_t)*totalValues}; + for (uint32_t i = 0; i < bindingCount; i++) + { + m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { + {.size = sizes[i], .usage = + IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | + IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, + } + }); + + auto reqs = m_buffers[i]->getMemoryReqs(); + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); + + m_allocations[i] = m_device->allocate(reqs, m_buffers[i].get()); + + auto allocationType = i == 0 ? IDeviceMemoryAllocation::EMCAF_WRITE : IDeviceMemoryAllocation::EMCAF_READ; + auto mapResult = m_allocations[i].memory->map({ 0ull,m_allocations[i].memory->getAllocationSize() }, allocationType); + assert(mapResult); + } + + smart_refctd_ptr descriptorPool = nullptr; + { + IDescriptorPool::SCreateInfo createInfo = {}; + createInfo.maxSets = 1; + createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = bindingCount; + descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); + } + + m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout)); + + IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {}; + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {}; + + for(int i = 0; i < bindingCount; ++i) + { + writeDescriptorSets[i].info = &descriptorInfos[i]; + writeDescriptorSets[i].dstSet = m_descriptorSet.get(); + writeDescriptorSets[i].binding = i; + writeDescriptorSets[i].count = bindings[i].count; + + descriptorInfos[i].desc = m_buffers[i]; + descriptorInfos[i].info.buffer.size = ~0ull; + } + + m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); + + // Write test data to the m_buffers[0] + auto outPtr = m_allocations[0].memory->getMappedPointer(); + assert(outPtr); + memcpy( + reinterpret_cast(outPtr), + reinterpret_cast(&TestCaseIndices[0]), + sizeof(TestCaseIndices) + ); + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override + { + cpu_tests(); + + constexpr auto StartedValue = 0; + + smart_refctd_ptr progress = m_device->createSemaphore(StartedValue); + + m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t layoutBufferBarrier[1] = { { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT, + .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + // whole buffer because we transferred the contents into it + .range = {.offset = 0,.size = m_buffers[1]->getCreationParams().size,.buffer = m_buffers[1]} + } }; + + const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .bufBarriers = layoutBufferBarrier }; + m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); + + + const IGPUDescriptorSet* set = m_descriptorSet.get(); + PushConstants coopBinarySearchPC = { + .EntityCount = numIndices, + }; + + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); + m_cmdbuf->pushConstants(m_pipeline->getLayout(), nbl::hlsl::ShaderStage::ESS_COMPUTE, 0u, sizeof(PushConstants), &coopBinarySearchPC); + m_cmdbuf->dispatch((totalValues + 255u) / 256u, 1u, 1u); + + layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); + m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); + + m_cmdbuf->end(); + + { + constexpr auto FinishedValue = 69; + IQueue::SSubmitInfo submitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + submitInfos[0].signalSemaphores = signals; + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + const ISemaphore::SWaitInfo waitInfos[] = { { + .semaphore = progress.get(), + .value = FinishedValue + } }; + m_device->blockForSemaphores(waitInfos); + } + + auto ptr = m_allocations[1].memory->getMappedPointer(); + assert(ptr); + + uint32_t* valuesPtr = reinterpret_cast(ptr); + for (uint32_t i = 0; i < totalValues; i++) { + uint32_t value = valuesPtr[i]; + const uint32_t* binarySearchResult = std::upper_bound(TestCaseIndices, TestCaseIndices + numIndices, i); + uint32_t lowerBoundIndex = std::distance(TestCaseIndices, binarySearchResult) - 1; + assert(value == lowerBoundIndex); + } + + m_keepRunning = false; + } + + bool keepRunning() override + { + return m_keepRunning; + } + + +private: + smart_refctd_ptr m_pipeline = nullptr; + smart_refctd_ptr m_descriptorSetLayout; + smart_refctd_ptr m_descriptorSet; + + smart_refctd_ptr m_buffers[2]; + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocations[2] = {}; + smart_refctd_ptr m_cmdbuf = nullptr; + IQueue* m_queue; + smart_refctd_ptr m_commandPool; + uint64_t m_iteration = 0; + constexpr static inline uint64_t MaxIterations = 200; + + bool m_keepRunning = true; +}; + +NBL_MAIN_FUNC(CooperativeBinarySearch) + +void cpu_tests() +{ +} diff --git a/72_CooperativeBinarySearch/testCaseData.h b/72_CooperativeBinarySearch/testCaseData.h new file mode 100644 index 000000000..16153780e --- /dev/null +++ b/72_CooperativeBinarySearch/testCaseData.h @@ -0,0 +1,1192 @@ +0, +298, +554, +582, +912, +1074, +1076, +1078, +1170, +1188, +2140, +2414, +2736, +2738, +3980, +4800, +5898, +5900, +6936, +8106, +8152, +8650, +8844, +8930, +9504, +10244, +10826, +10828, +11126, +11430, +12206, +13764, +14010, +15302, +15624, +15656, +16414, +16494, +17368, +17432, +18312, +18948, +19376, +19818, +20146, +20604, +21240, +22446, +23482, +24914, +25042, +25538, +26764, +27564, +27566, +28472, +29450, +30202, +31474, +32160, +32676, +33792, +33794, +34704, +36540, +37456, +37950, +38364, +39274, +40442, +40518, +41412, +41590, +41950, +42022, +42714, +43464, +43790, +43792, +44876, +44878, +46188, +46572, +47352, +47650, +48242, +49856, +49858, +50506, +50968, +50970, +51152, +51154, +52870, +52884, +53332, +53334, +53904, +53964, +53966, +53968, +53970, +53972, +53974, +53976, +53978, +53980, +54514, +54516, +54518, +54520, +54762, +55866, +56462, +56478, +56480, +56482, +57510, +57568, +57570, +57572, +57846, +57848, +58760, +59408, +59438, +60198, +60200, +60202, +60204, +60284, +60938, +61274, +61720, +62296, +63116, +63378, +63380, +63382, +63384, +63386, +63388, +63904, +64572, +65142, +65144, +65146, +65554, +65738, +66052, +67016, +67424, +67566, +68270, +68272, +68610, +69240, +69870, +70988, +72622, +73258, +73260, +73580, +74524, +74880, +74958, +74960, +74962, +75114, +75116, +75622, +77144, +77798, +77800, +78314, +79566, +79568, +79570, +79572, +79850, +79852, +81576, +81684, +81686, +82492, +82494, +82496, +82498, +83990, +84860, +84988, +84990, +85138, +85772, +86120, +86122, +86564, +87402, +87404, +87602, +88676, +88714, +88780, +89560, +89732, +90786, +91128, +91130, +91272, +91522, +91804, +92588, +92590, +92834, +93268, +93736, +94448, +94704, +94706, +95074, +95076, +96706, +97040, +97770, +98000, +98676, +99968, +100074, +100318, +100602, +100914, +101020, +101872, +101878, +103078, +104246, +104266, +105436, +106332, +106954, +107856, +108954, +110320, +110780, +111588, +111882, +112502, +112676, +113496, +114070, +115204, +115422, +115424, +115858, +116420, +117426, +118504, +118870, +119296, +119618, +119650, +120408, +120488, +121362, +121426, +122306, +122942, +123370, +123812, +124140, +124598, +125234, +126440, +127476, +128908, +129036, +129532, +130758, +131558, +131560, +132466, +133444, +134196, +135468, +136154, +136670, +137786, +137788, +138698, +140534, +140832, +141608, +142422, +143220, +143468, +143714, +144504, +145078, +145670, +146224, +146874, +147726, +148692, +149536, +151032, +151126, +153382, +154128, +155190, +155212, +156324, +156484, +156526, +157026, +158242, +158446, +158448, +158594, +159256, +160350, +160444, +161040, +161624, +162418, +162524, +162768, +163052, +163364, +163470, +164322, +164328, +165528, +166696, +166716, +167886, +168782, +169404, +170306, +171404, +172770, +173230, +174038, +174332, +174952, +175126, +175946, +176520, +177654, +177872, +177874, +178308, +178870, +179876, +180954, +181320, +181746, +182160, +183070, +184238, +184314, +185208, +185386, +185746, +185818, +186510, +187260, +187586, +187588, +188672, +188674, +189984, +190368, +191148, +191446, +192038, +193652, +193654, +194302, +194764, +194766, +194948, +194950, +196666, +196680, +197128, +197130, +197700, +198048, +198824, +199638, +200436, +200684, +200930, +201720, +202294, +202886, +203440, +204090, +204942, +205908, +206752, +208248, +208342, +210598, +211344, +212406, +212428, +213540, +213700, +213742, +214242, +215458, +215662, +215664, +215810, +216472, +217566, +217660, +218256, +218316, +218318, +218320, +218322, +218324, +218326, +218328, +218330, +218332, +218866, +218868, +218870, +218872, +219114, +220218, +220814, +220830, +220832, +220834, +221862, +221920, +221922, +221924, +222198, +222200, +223112, +223760, +223790, +224550, +224552, +224554, +224556, +225140, +225794, +226130, +226576, +227152, +227972, +228234, +228236, +228238, +228240, +228242, +228244, +228760, +229428, +229998, +230000, +230002, +230410, +230594, +230908, +231872, +232280, +232422, +233126, +233128, +233466, +234096, +234726, +235844, +237478, +238114, +238116, +238512, +239256, +239812, +240660, +241950, +243244, +243366, +244346, +244412, +244710, +245202, +246504, +246728, +246988, +247592, +248630, +249562, +250962, +251964, +252562, +253140, +253412, +254672, +255276, +256084, +256160, +256378, +257104, +257602, +257776, +258240, +258556, +258614, +259208, +260496, +261202, +261398, +262284, +262610, +262976, +263578, +264622, +265558, +266692, +266756, +268110, +268994, +269158, +269718, +270388, +270768, +271098, +271786, +272398, +272996, +273140, +273612, +274226, +274660, +275070, +275416, +275634, +275680, +276088, +276408, +276410, +276852, +277690, +277692, +277890, +278964, +279002, +279068, +279848, +280020, +281074, +281416, +281418, +281560, +281810, +282092, +282876, +282878, +283122, +283556, +284024, +284736, +284992, +284994, +285362, +285364, +286994, +287328, +288058, +288288, +288964, +289708, +289746, +290266, +291136, +292152, +292740, +292834, +293708, +293768, +293936, +294846, +295028, +295040, +295130, +295372, +296154, +296736, +297250, +297606, +298068, +298310, +299420, +300362, +301176, +301502, +301878, +302702, +303576, +303896, +305170, +305928, +306070, +306150, +307094, +307450, +307528, +307530, +307532, +307684, +307686, +308192, +309714, +310368, +310370, +310884, +312136, +312138, +312140, +312142, +312420, +312422, +314146, +314254, +314256, +315062, +315064, +315066, +315068, +316560, +317430, +317558, +317560, +317708, +318342, +319182, +319992, +320612, +320956, +321068, +321076, +322784, +322914, +323106, +324036, +324708, +326092, +326994, +327332, +328080, +328444, +329022, +329256, +330454, +331304, +331610, +332432, +332440, +333298, +334300, +334478, +334622, +335370, +335818, +336456, +336618, +337930, +338932, +339158, +339258, +339746, +340226, +340254, +340256, +340988, +341638, +342674, +343168, +343440, +344024, +344026, +344106, +345118, +346124, +347350, +348560, +348878, +349066, +350192, +350840, +351388, +353610, +354562, +355208, +356084, +356966, +358222, +359304, +359470, +360054, +360710, +360920, +361896, +362930, +362962, +363128, +363234, +363272, +363284, +363456, +363732, +364418, +364926, +365096, +365170, +365920, +366796, +367838, +368232, +368940, +369508, +369530, +370886, +371156, +371348, +372384, +372680, +372690, +373252, +373676, +374168, +374424, +374452, +374782, +374944, +374946, +374948, +375040, +375058, +376010, +376284, +376606, +376608, +377850, +378670, +379768, +379770, +380806, +381976, +382022, +382520, +382714, +382800, +383374, +384114, +384696, +384698, +384996, +385300, +386076, +387634, +387880, +388796, +389290, +389302, +389314, +389338, +389406, +389434, +389470, +389840, +389952, +390908, +391076, +391188, +392118, +392458, +392472, +392622, +392766, +393448, +394586, +394816, +394824, +395486, +396218, +396880, +396910, +397066, +397076, +397124, +397678, +398050, +399160, +400080, +401696, +401762, +402400, +402500, +402512, +403152, +404038, +404444, +404648, +404740, +405322, +406252, +407076, +408252, +408634, +409354, +410112, +411138, +411672, +411880, +412232, +412926, +412956, +413864, +414624, +415770, +415978, +417234, +417256, +417264, +418562, +418812, +418824, +418836, +418860, +418928, +418956, +418992, +419362, +419474, +420430, +420598, +420710, +421640, +421980, +421994, +422144, +422288, +422970, +424108, +424338, +424346, +425008, +425740, +426402, +426432, +426588, +426598, +426646, +427200, +427572, +428682, +429602, +430346, +430412, +431050, +431150, +431162, +431802, +432688, +433094, +433298, +433390, +433972, +434902, +435726, +436902, +437284, +438004, +438762, +439788, +440322, +440530, +440882, +441576, +441606, +442514, +443274, +444420, +444628, +445884, +445906, +445914, +447212, +447462, +448464, +448690, +448790, +449278, +449758, +449786, +449788, +450520, +451170, +452206, +452700, +452972, +453556, +453558, +453638, +454650, +455656, +456882, +458092, +458410, +458598, +459724, +460372, +460920, +463142, +464094, +464740, +465616, +466498, +467754, +468836, +469002, +469586, +470180, +471468, +472174, +472370, +473256, +473582, +473948, +474550, +475594, +476530, +477664, +477728, +479082, +479966, +480130, +480690, +481360, +481740, +482070, +482758, +483370, +483968, +484112, +484584, +485198, +485632, +486042, +486388, +486606, +486652, +487060, +488676, +489420, +489976, +490824, +492114, +493408, +493530, +494510, +494576, +494874, +495366, +496668, +496892, +497152, +497756, +498794, +499726, +501126, +502128, +502726, +503304, +503576, +504836, +505440, +506248, +506324, +506542, +507268, +507766, +507940, +508404, +508720, +509514, +510170, +510380, +511356, +512390, +512422, +512588, +512694, +512732, +512744, +512916, +513192, +513878, +514386, +514556, +514630, +515380, +516256, +517298, +517692, +518400, +518968, +518990, +520346, +520616, +520808, +521844, +522140, +522150, +522712, +523136, +523628, +524468, +525278, +525898, +526242, +526354, +526362, +528070, +528200, +528392, +529322, +529994, +531378, +532280, +532618, +533366, +533730, +534308, +534542, +535740, +536590, +536896, +537718, +537726, +538584, +539586, +539764, +539908, +540656, +541104, +541742, +541904, +543216, +543612, +543650, +544170, +545040, +546056, +546644, +546738, +547612, +547672, +547840, +548750, +548932, +548944, +549034, +549276, +550058, +550640, +551154, +551510, +551972, +552214, +553324, +554266, +555080, +555406, +555782, +556606, +557480, +557800, +559074, +559832, +559974, +550468, +551276, +552568, +552866, +553798, +554120, +554294, +555554, +556448, +556874, +557328, +557680, +558532, +559844, +560774, +561050, +561458, +562684, +563910, +564026, +564542, +565294, +565434, +566278, +567580, +568006, +568328, +569626, +570350, +570998, +572812, +573008, +573500, +573828, +573840, +573842, +574798, +576066, +576774, +577182, +577184, +577522, +577524, +578734, +579854, +579856, +581128, +581278, +582296, +583496, +583944, +584160, +584844, +584954, +584968, +585486, +586592, +586594, +587158, +587320, +588006, +589012, +590302, +590366, +590444, +590944, +581786, +582234, +582920, +582922, +564780, +565486, +565684, +566570, +566896, +567262, +567864, +568958, +570268, +570844, +572014, +573368, +574252, +574416, +574976, +575646, +576026, +576356, +577044, +577046, +577644, +577788, +578260, +578874, +579308, +579718, +580288, +580942, +581534, +581536, +576350, +576352 \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ce94f93..cbe482aa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(12_MeshLoaders) # add_subdirectory(13_MaterialCompilerTest) + add_subdirectory(14_Mortons EXCLUDE_FROM_ALL) # Waiting for a refactor #add_subdirectory(27_PLYSTLDemo) @@ -87,6 +88,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) + add_subdirectory(72_CooperativeBinarySearch) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS)