diff --git a/03_DeviceSelectionAndSharedSources/main.cpp b/03_DeviceSelectionAndSharedSources/main.cpp
index b8fd3d18b..bcc849a4d 100644
--- a/03_DeviceSelectionAndSharedSources/main.cpp
+++ b/03_DeviceSelectionAndSharedSources/main.cpp
@@ -257,7 +257,7 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M
 		}
 
 		const auto* metadata = assetBundle.getMetadata();
-    const auto hlslMetadata = static_cast<const CHLSLMetadata*>(metadata);
+		const auto hlslMetadata = static_cast<const CHLSLMetadata*>(metadata);
 		const auto shaderStage = hlslMetadata->shaderStages->front();
 
 		// It would be super weird if loading a shader from a file produced more than 1 asset
diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt
index a434ff32a..55ebaf41d 100644
--- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt
+++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt
@@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+	app_resources/common.hlsl
+	app_resources/shader.comp.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+		"INPUT": "app_resources/shader.comp.hlsl",
+		"KEY": "shader",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl
index af38ffada..31c60aefd 100644
--- a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl
+++ b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl
@@ -1,12 +1,9 @@
 #include "common.hlsl"
 
-// just a small test
-#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
-
 [[vk::push_constant]] PushConstantData pushConstants;
 
 // does absolutely nothing, a later example will show how it gets used
-template<typename capability_traits=nbl::hlsl::jit::device_capabilities_traits>
+template<typename capability_traits=DeviceConfigCaps>
 void dummyTraitTest() {}
 
 [numthreads(WorkgroupSize,1,1)]
diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp
index b82dc18ca..ab0984a07 100644
--- a/05_StreamingAndBufferDeviceAddressApp/main.cpp
+++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp
@@ -6,6 +6,7 @@
 // I've moved out a tiny part of this example into a shared header for reuse, please open and read it.
 #include "nbl/application_templates/MonoDeviceApplication.hpp"
 #include "nbl/examples/common/BuiltinResourcesApplication.hpp"
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 
 using namespace nbl;
@@ -95,15 +96,15 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M
 			{
 				IAssetLoader::SAssetLoadParams lp = {};
 				lp.logger = m_logger.get();
-				lp.workingDirectory = ""; // virtual root
-				auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl",lp);
+				lp.workingDirectory = "app_resources"; // virtual root
+
+				auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get());
+				auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
 				const auto assets = assetBundle.getContents();
 				if (assets.empty())
 					return logFail("Could not load shader!");
 
-				// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
-				const auto shaderSource = IAsset::castDown<IShader>(assets[0]);
-				shader = m_device->compileShader({shaderSource.get()});
+				shader = IAsset::castDown<IShader>(assets[0]);
 				// The down-cast should not fail!
 				assert(shader);
 			}
diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt
index a434ff32a..fe063be7c 100644
--- a/07_StagingAndMultipleQueues/CMakeLists.txt
+++ b/07_StagingAndMultipleQueues/CMakeLists.txt
@@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/common.hlsl
+    app_resources/comp_shader.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/comp_shader.hlsl",
+        "KEY": "comp_shader",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/07_StagingAndMultipleQueues/app_resources/common.hlsl b/07_StagingAndMultipleQueues/app_resources/common.hlsl
index 259d5069d..de15810c9 100644
--- a/07_StagingAndMultipleQueues/app_resources/common.hlsl
+++ b/07_StagingAndMultipleQueues/app_resources/common.hlsl
@@ -1,8 +1,8 @@
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 
-NBL_CONSTEXPR uint32_t WorkgroupSizeX = 16;
-NBL_CONSTEXPR uint32_t WorkgroupSizeY = 16;
-NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 16;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 16;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;
 
 static const uint32_t FRAMES_IN_FLIGHT = 3u;
 
diff --git a/07_StagingAndMultipleQueues/main.cpp b/07_StagingAndMultipleQueues/main.cpp
index fc6bf4551..a850c1c47 100644
--- a/07_StagingAndMultipleQueues/main.cpp
+++ b/07_StagingAndMultipleQueues/main.cpp
@@ -4,6 +4,7 @@
 
 // I've moved out a tiny part of this example into a shared header for reuse, please open and read it.
 #include "nbl/examples/examples.hpp"
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 using namespace nbl;
 using namespace nbl::core;
@@ -189,7 +190,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
 		for (uint32_t imageIdx = 0; imageIdx < IMAGE_CNT; ++imageIdx)
 		{
 			const auto imagePathToLoad = imagesToLoad[imageIdx];
-			auto cpuImage = loadFistAssetInBundle<ICPUImage>(imagePathToLoad);
+			auto cpuImage = loadImageAsset(imagePathToLoad);
 			if (!cpuImage)
 				logFailAndTerminate("Failed to load image from path %s",ILogger::ELL_ERROR,imagePathToLoad);
 
@@ -279,17 +280,10 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
 		}
 
 		// LOAD SHADER FROM FILE
-		smart_refctd_ptr<IShader> source;
-		{
-			source = loadFistAssetInBundle<IShader>("../app_resources/comp_shader.hlsl");
-		}
+		smart_refctd_ptr<IShader> shader = loadPreCompiledShader<"comp_shader">(); // "../app_resources/comp_shader.hlsl"
 
-		if (!source)
-			logFailAndTerminate("Could not create a CPU shader!");
-
-		core::smart_refctd_ptr<IShader> shader = m_device->compileShader({ source.get() });
-		if(!shader)
-			logFailAndTerminate("Could not compile shader to spirv!");
+		if (!shader)
+			logFailAndTerminate("Could not load the precompiled shader!");
 
 		// CREATE COMPUTE PIPELINE
 		SPushConstantRange pc[1];
@@ -534,21 +528,39 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul
 
 		return false;
 	}
-
-	template<typename AssetType>
-	core::smart_refctd_ptr<AssetType> loadFistAssetInBundle(const std::string& path)
+	
+	core::smart_refctd_ptr<ICPUImage> loadImageAsset(const std::string& path)
 	{
 		IAssetLoader::SAssetLoadParams lp;
 		SAssetBundle bundle = m_assetMgr->getAsset(path, lp);
 		if (bundle.getContents().empty())
-			logFailAndTerminate("Couldn't load an asset.",ILogger::ELL_ERROR);
+			logFailAndTerminate("Couldn't load an image.",ILogger::ELL_ERROR);
 
-		auto asset = IAsset::castDown<AssetType>(bundle.getContents()[0]);
+		auto asset = IAsset::castDown<ICPUImage>(bundle.getContents()[0]);
 		if (!asset)
 			logFailAndTerminate("Incorrect asset loaded.",ILogger::ELL_ERROR);
 
 		return asset;
 	}
+
+	template<core::StringLiteral ShaderKey>
+	core::smart_refctd_ptr<IShader> loadPreCompiledShader()
+	{
+		IAssetLoader::SAssetLoadParams lp;
+		lp.logger = m_logger.get();
+		lp.workingDirectory = "app_resources";
+
+		auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+		SAssetBundle bundle = m_assetMgr->getAsset(key.data(), lp);
+		if (bundle.getContents().empty())
+			logFailAndTerminate("Couldn't load a shader.", ILogger::ELL_ERROR);
+
+		auto asset = IAsset::castDown<IShader>(bundle.getContents()[0]);
+		if (!asset)
+			logFailAndTerminate("Incorrect asset loaded.", ILogger::ELL_ERROR);
+
+		return asset;
+	}
 };
 
 NBL_MAIN_FUNC(StagingAndMultipleQueuesApp)
diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt
index b7cad41da..14bde428d 100644
--- a/10_CountingSort/CMakeLists.txt
+++ b/10_CountingSort/CMakeLists.txt
@@ -22,3 +22,70 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
 endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+	app_resources/common.hlsl
+	app_resources/prefix_sum_shader.comp.hlsl
+	app_resources/scatter_shader.comp.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(REQUIRED_CAPS [=[
+	{
+	  "kind": "limits",
+	  "name": "maxComputeWorkGroupInvocations",
+	  "type": "uint32_t",
+	  "values": [256,512,1024]
+	},
+	{
+	  "kind": "limits",
+	  "name": "maxComputeSharedMemorySize",
+	  "type": "uint32_t",
+	  "values": [16384, 32768, 65536]
+	}
+]=])
+
+set(JSON [=[
+[
+	{
+		"INPUT": "app_resources/prefix_sum_shader.comp.hlsl",
+		"KEY": "prefix_sum_shader",
+		"CAPS": [${REQUIRED_CAPS}]
+    },
+    {
+		"INPUT": "app_resources/scatter_shader.comp.hlsl",
+		"KEY": "scatter_shader",
+		"CAPS": [${REQUIRED_CAPS}]
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
diff --git a/10_CountingSort/app_resources/common.hlsl b/10_CountingSort/app_resources/common.hlsl
index bcbf01727..1074432b0 100644
--- a/10_CountingSort/app_resources/common.hlsl
+++ b/10_CountingSort/app_resources/common.hlsl
@@ -22,6 +22,10 @@ using namespace nbl::hlsl;
 #ifdef __HLSL_VERSION
 #include "nbl/builtin/hlsl/bda/bda_accessor.hlsl"
 
+static const uint32_t WorkgroupSize = DeviceConfigCaps::maxComputeWorkGroupInvocations;
+static const uint32_t MaxBucketCount = (DeviceConfigCaps::maxComputeSharedMemorySize / sizeof(uint32_t)) / 2;
+static const uint32_t BucketCount = (MaxBucketCount > 3000) ? 3000 : MaxBucketCount;
+
 using Ptr = bda::__ptr<uint32_t>;
 using PtrAccessor = BdaAccessor<uint32_t>;
 
@@ -54,6 +58,8 @@ uint32_t3 glsl::gl_WorkGroupSize()
 {
     return uint32_t3(WorkgroupSize, 1, 1);
 }
+
+
 #endif
 
 #endif
\ No newline at end of file
diff --git a/10_CountingSort/main.cpp b/10_CountingSort/main.cpp
index d51650919..a22647750 100644
--- a/10_CountingSort/main.cpp
+++ b/10_CountingSort/main.cpp
@@ -1,4 +1,5 @@
 #include "nbl/examples/examples.hpp"
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 using namespace nbl;
 using namespace nbl::core;
@@ -32,19 +33,34 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio
 				return false;
 
 			auto limits = m_physicalDevice->getLimits();
+			constexpr std::array<uint32_t, 3u> AllowedMaxComputeSharedMemorySizes = {
+				16384, 32768, 65536
+			};
+
+			auto upperBoundSharedMemSize = std::upper_bound(AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize);
+			// devices which support less than 16KB of max compute shared memory size are not supported
+			if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin())
+			{
+				m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize);
+				exit(0);
+			}
+
+			limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1);
+
 			const uint32_t WorkgroupSize = limits.maxComputeWorkGroupInvocations;
 			const uint32_t MaxBucketCount = (limits.maxComputeSharedMemorySize / sizeof(uint32_t)) / 2;
 			constexpr uint32_t element_count = 100000;
 			const uint32_t bucket_count = std::min((uint32_t)3000, MaxBucketCount);
 			const uint32_t elements_per_thread = ceil((float)ceil((float)element_count / limits.computeUnits) / WorkgroupSize);
 
-			auto prepShader = [&](const core::string& path) -> smart_refctd_ptr<IShader>
+			auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
 			{
 				// this time we load a shader directly from a file
 				IAssetLoader::SAssetLoadParams lp = {};
 				lp.logger = m_logger.get();
-				lp.workingDirectory = ""; // virtual root
-				auto assetBundle = m_assetMgr->getAsset(path,lp);
+				lp.workingDirectory = "app_resources"; // virtual root
+				auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(limits, m_physicalDevice->getFeatures());
+				auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
 				const auto assets = assetBundle.getContents();
 				if (assets.empty())
 				{
@@ -52,29 +68,24 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio
 					return nullptr;
 				}
 
-				auto source = IAsset::castDown<IShader>(assets[0]);
+				auto shader = IAsset::castDown<IShader>(assets[0]);
 				// The down-cast should not fail!
-				assert(source);
+				assert(shader);
 			
 				// There's two ways of doing stuff like this:
 				// 1. this - modifying the asset after load
 				// 2. creating a short shader source file that includes the asset you would have wanted to load
-				auto overrideSource = CHLSLCompiler::createOverridenCopy(
-					source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n",
-					WorkgroupSize, bucket_count
-				);
+				// 
+				//auto overrideSource = CHLSLCompiler::createOverridenCopy(
+				//	source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n",
+				//	WorkgroupSize, bucket_count
+				//);
 
 				// this time we skip the use of the asset converter since the IShader->IGPUShader path is quick and simple
-				auto shader = m_device->compileShader({ overrideSource.get() });
-				if (!shader)
-				{
-					logFail("Creation of Prefix Sum Shader from CPU Shader source failed!");
-					return nullptr;
-				}
 				return shader;
 			};
-			auto prefixSumShader = prepShader("app_resources/prefix_sum_shader.comp.hlsl");
-			auto scatterShader = prepShader("app_resources/scatter_shader.comp.hlsl");
+			auto prefixSumShader = loadPrecompiledShader.operator()<"prefix_sum_shader">(); // "app_resources/prefix_sum_shader.comp.hlsl"
+			auto scatterShader = loadPrecompiledShader.operator()<"scatter_shader">(); // "app_resources/scatter_shader.comp.hlsl"
 
 			// People love Reflection but I prefer Shader Sources instead!
 			const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(CountingPushData) };
diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt
index a434ff32a..ca9fe8428 100644
--- a/11_FFT/CMakeLists.txt
+++ b/11_FFT/CMakeLists.txt
@@ -21,4 +21,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/common.hlsl
+    app_resources/shader.comp.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/shader.comp.hlsl",
+        "KEY": "shader",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp
index 1886da72a..49d157a38 100644
--- a/11_FFT/main.cpp
+++ b/11_FFT/main.cpp
@@ -2,6 +2,7 @@
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
 
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 #include "nbl/examples/examples.hpp"
 
@@ -45,15 +46,6 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ
 	smart_refctd_ptr<ISemaphore> m_timeline;
 	uint64_t semaphorValue = 0;
 
-	inline core::smart_refctd_ptr<asset::IShader> createShader(
-		const char* includeMainName)
-	{
-		std::string prelude = "#include \"";
-		auto hlslShader = core::make_smart_refctd_ptr<IShader>((prelude + includeMainName + "\"\n").c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName);
-		assert(hlslShader);
-		return m_device->compileShader({ hlslShader.get() });
-	}
-
 public:
 	// Yay thanks to multiple inheritance we cannot forward ctors anymore
 	FFT_Test(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) :
@@ -68,28 +60,23 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ
 		if (!asset_base_t::onAppInitialized(std::move(system)))
 			return false;
 
-		// this time we load a shader directly from a file
 		smart_refctd_ptr<IShader> shader;
-		/* {
+		{
 			IAssetLoader::SAssetLoadParams lp = {};
 			lp.logger = m_logger.get();
-			lp.workingDirectory = ""; // virtual root
-			auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl", lp);
+			lp.workingDirectory = "app_resources"; // virtual root
+			auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get());
+			auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
 			const auto assets = assetBundle.getContents();
 			if (assets.empty())
 				return logFail("Could not load shader!");
 
 			// Cast down the asset to its proper type
-			auto source = IAsset::castDown<IShader>(assets[0]);
-			// The down-cast should not fail!
-			assert(source);
-
-			// Compile directly to SPIR-V Shader
-			shader = m_device->compileShader({ source.get() });
+			shader = IAsset::castDown<IShader>(assets[0]);
+			
 			if (!shader)
-				return logFail("Creation of a SPIR-V Shader from HLSL Shader source failed!");
-		}*/
-		shader = createShader("app_resources/shader.comp.hlsl");
+				return logFail("Invalid shader!");
+		}
 
 		// Create massive upload/download buffers
 		constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23;
diff --git a/14_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt
new file mode 100644
index 000000000..a434ff32a
--- /dev/null
+++ b/14_Mortons/CMakeLists.txt
@@ -0,0 +1,24 @@
+include(common RESULT_VARIABLE RES)
+if(NOT RES)
+	message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
+endif()
+
+nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")
+
+if(NBL_EMBED_BUILTIN_RESOURCES)
+	set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
+	set(RESOURCE_DIR "app_resources")
+
+	get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
+	get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
+	get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)
+
+    file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
+    foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
+      LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
+    endforeach()
+
+	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
+
+	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
+endif()
\ No newline at end of file
diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h
new file mode 100644
index 000000000..6933e77e5
--- /dev/null
+++ b/14_Mortons/CTester.h
@@ -0,0 +1,521 @@
+#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_
+#define _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_
+
+#include <nabla.h>
+#include "app_resources/testCommon.hlsl"
+#include "app_resources/testCommon2.hlsl"
+#include "ITester.h"
+
+using namespace nbl;
+
+class CTester final : public ITester
+{
+public:
+    void performTests()
+    {
+        std::random_device rd;
+        std::mt19937 mt(rd());
+
+        std::uniform_int_distribution<uint16_t> shortDistribution(uint16_t(0), std::numeric_limits<uint16_t>::max());
+        std::uniform_int_distribution<uint32_t> intDistribution(uint32_t(0), std::numeric_limits<uint32_t>::max());
+        std::uniform_int_distribution<uint64_t> longDistribution(uint64_t(0), std::numeric_limits<uint64_t>::max());
+
+        m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE);
+        for (int i = 0; i < Iterations; ++i)
+        {
+            // Set input thest values that will be used in both CPU and GPU tests
+            InputTestValues testInput;
+            // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values
+            TestValues expected;
+
+            uint32_t generatedShift = intDistribution(mt) & uint32_t(63);
+            testInput.shift = generatedShift;
+            {
+                uint64_t generatedA = longDistribution(mt);
+                uint64_t generatedB = longDistribution(mt);
+
+                testInput.generatedA = generatedA;
+                testInput.generatedB = generatedB;
+
+                expected.emulatedAnd = _static_cast<emulated_uint64_t>(generatedA & generatedB);
+                expected.emulatedOr = _static_cast<emulated_uint64_t>(generatedA | generatedB);
+                expected.emulatedXor = _static_cast<emulated_uint64_t>(generatedA ^ generatedB);
+                expected.emulatedNot = _static_cast<emulated_uint64_t>(~generatedA);
+                expected.emulatedPlus = _static_cast<emulated_uint64_t>(generatedA + generatedB);
+                expected.emulatedMinus = _static_cast<emulated_uint64_t>(generatedA - generatedB);
+                expected.emulatedUnaryMinus = _static_cast<emulated_int64_t>(-generatedA);
+                expected.emulatedLess = uint32_t(generatedA < generatedB);
+                expected.emulatedLessEqual = uint32_t(generatedA <= generatedB);
+                expected.emulatedGreater = uint32_t(generatedA > generatedB);
+                expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB);
+
+                expected.emulatedLeftShifted = _static_cast<emulated_uint64_t>(generatedA << generatedShift);
+                expected.emulatedUnsignedRightShifted = _static_cast<emulated_uint64_t>(generatedA >> generatedShift);
+                expected.emulatedSignedRightShifted = _static_cast<emulated_int64_t>(static_cast<int64_t>(generatedA) >> generatedShift);
+            }
+            {
+                testInput.coordX = longDistribution(mt);
+                testInput.coordY = longDistribution(mt);
+                testInput.coordZ = longDistribution(mt);
+                testInput.coordW = longDistribution(mt);
+
+                uint64_t2 Vec2A = { testInput.coordX, testInput.coordY };
+                uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW };
+
+                uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_2>(Vec2A);
+                uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_2>(Vec2B);
+                uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_2>(Vec2A);
+                uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_2>(Vec2B);
+                uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_2>(Vec2A);
+                uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_2>(Vec2B);
+
+                uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ };
+                uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW };
+
+                uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_3>(Vec3A);
+                uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_3>(Vec3B);
+                uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_3>(Vec3A);
+                uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_3>(Vec3B);
+                uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_3>(Vec3A);
+                uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_3>(Vec3B);
+
+                uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW };
+                uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX };
+
+                uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_4>(Vec4A);
+                uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_4>(Vec4B);
+                uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_4>(Vec4A);
+                uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_4>(Vec4B);
+                uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4>(Vec4A);
+                uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4>(Vec4B);
+
+                // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them
+                // so their highest bits are all 0s or 1s depending on the sign of the number they encode
+
+                int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_2>(Vec2A);
+                int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_2>(Vec2B);
+                int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true,mediumBits_2 >(Vec2A);
+                int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_2>(Vec2B);
+                int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2>(Vec2A);
+                int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2>(Vec2B);
+
+                int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_3>(Vec3A);
+                int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_3>(Vec3B);
+                int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_3>(Vec3A);
+                int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_3>(Vec3B);
+                int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3>(Vec3A);
+                int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3>(Vec3B);
+
+                int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_4>(Vec4A);
+                int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_4>(Vec4B);
+                int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_4>(Vec4A);
+                int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_4>(Vec4B);
+                int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4>(Vec4A);
+                int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4>(Vec4B);
+
+                // Plus
+                expected.mortonPlus_small_2 = createMortonFromU64Vec<false, smallBits_2, 2>(Vec2ASmall + Vec2BSmall);
+                expected.mortonPlus_medium_2 = createMortonFromU64Vec<false, mediumBits_2, 2>(Vec2AMedium + Vec2BMedium);
+                expected.mortonPlus_full_2 = createMortonFromU64Vec<false, fullBits_2, 2>(Vec2AFull + Vec2BFull);
+                expected.mortonPlus_emulated_2 = createMortonFromU64Vec<false, fullBits_2, 2, emulated_uint64_t>(Vec2AFull + Vec2BFull);
+
+                expected.mortonPlus_small_3 = createMortonFromU64Vec<false, smallBits_3, 3>(Vec3ASmall + Vec3BSmall);
+                expected.mortonPlus_medium_3 = createMortonFromU64Vec<false, mediumBits_3, 3>(Vec3AMedium + Vec3BMedium);
+                expected.mortonPlus_full_3 = createMortonFromU64Vec<false, fullBits_3, 3>(Vec3AFull + Vec3BFull);
+                expected.mortonPlus_emulated_3 = createMortonFromU64Vec<false, fullBits_3, 3, emulated_uint64_t>(Vec3AFull + Vec3BFull);
+
+                expected.mortonPlus_small_4 = createMortonFromU64Vec<false, smallBits_4, 4>(Vec4ASmall + Vec4BSmall);
+                expected.mortonPlus_medium_4 = createMortonFromU64Vec<false, mediumBits_4, 4>(Vec4AMedium + Vec4BMedium);
+                expected.mortonPlus_full_4 = createMortonFromU64Vec<false, fullBits_4, 4>(Vec4AFull + Vec4BFull);
+                expected.mortonPlus_emulated_4 = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4AFull + Vec4BFull);
+
+                // Minus
+                expected.mortonMinus_small_2 = createMortonFromU64Vec<false, smallBits_2, 2>(Vec2ASmall - Vec2BSmall);
+                expected.mortonMinus_medium_2 = createMortonFromU64Vec<false, mediumBits_2, 2>(Vec2AMedium - Vec2BMedium);
+                expected.mortonMinus_full_2 = createMortonFromU64Vec<false, fullBits_2, 2>(Vec2AFull - Vec2BFull);
+                expected.mortonMinus_emulated_2 = createMortonFromU64Vec<false, fullBits_2, 2, emulated_uint64_t>(Vec2AFull - Vec2BFull);
+
+                expected.mortonMinus_small_3 = createMortonFromU64Vec<false, smallBits_3, 3>(Vec3ASmall - Vec3BSmall);
+                expected.mortonMinus_medium_3 = createMortonFromU64Vec<false, mediumBits_3, 3>(Vec3AMedium - Vec3BMedium);
+                expected.mortonMinus_full_3 = createMortonFromU64Vec<false, fullBits_3, 3>(Vec3AFull - Vec3BFull);
+                expected.mortonMinus_emulated_3 = createMortonFromU64Vec<false, fullBits_3, 3, emulated_uint64_t>(Vec3AFull - Vec3BFull);
+
+                expected.mortonMinus_small_4 = createMortonFromU64Vec<false, smallBits_4, 4>(Vec4ASmall - Vec4BSmall);
+                expected.mortonMinus_medium_4 = createMortonFromU64Vec<false, mediumBits_4, 4>(Vec4AMedium - Vec4BMedium);
+                expected.mortonMinus_full_4 = createMortonFromU64Vec<false, fullBits_4, 4>(Vec4AFull - Vec4BFull);
+                expected.mortonMinus_emulated_4 = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4AFull - Vec4BFull);
+
+                // Coordinate-wise equality
+                expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall));
+                expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium));
+                expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull));
+                expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull));
+
+                expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall));
+                expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium));
+                expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull));
+                expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull));
+
+                expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall));
+                expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium));
+                expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull));
+                expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull));
+
+                // Coordinate-wise unsigned inequality (just testing with less)
+                expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall));
+                expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium));
+                expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull));
+                expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull));
+
+                expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall));
+                expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium));
+                expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull));
+                expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull));
+
+                expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall));
+                expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium));
+                expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull));
+                expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull));
+
+                // Coordinate-wise signed inequality
+                expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall));
+                expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium));
+                expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull));
+                expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull));
+
+                expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall));
+                expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium));
+                expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull));
+                expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull));
+
+                expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall));
+                expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium));
+                expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull));
+                expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull));
+
+                uint16_t castedShift = uint16_t(generatedShift);
+                // Left-shift
+                expected.mortonLeftShift_small_2 = createMortonFromU64Vec<false, smallBits_2, 2>(Vec2ASmall << uint16_t(castedShift % smallBits_2));
+                expected.mortonLeftShift_medium_2 = createMortonFromU64Vec<false, mediumBits_2, 2>(Vec2AMedium << uint16_t(castedShift % mediumBits_2));
+                expected.mortonLeftShift_full_2 = createMortonFromU64Vec<false, fullBits_2, 2>(Vec2AFull << uint32_t(castedShift % fullBits_2));
+                expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec<false, fullBits_2, 2, emulated_uint64_t>(Vec2AFull << uint32_t(castedShift % fullBits_2));
+                
+                expected.mortonLeftShift_small_3 = createMortonFromU64Vec<false, smallBits_3, 3>(Vec3ASmall << uint16_t(castedShift % smallBits_3));
+                expected.mortonLeftShift_medium_3 = createMortonFromU64Vec<false, mediumBits_3, 3>(Vec3AMedium << uint16_t(castedShift % mediumBits_3));
+                expected.mortonLeftShift_full_3 = createMortonFromU64Vec<false, fullBits_3, 3>(Vec3AFull << uint32_t(castedShift % fullBits_3));
+                expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec<false, fullBits_3, 3, emulated_uint64_t>(Vec3AFull << uint32_t(castedShift % fullBits_3));
+                
+                expected.mortonLeftShift_small_4 = createMortonFromU64Vec<false, smallBits_4, 4>(Vec4ASmall << uint16_t(castedShift % smallBits_4));
+                expected.mortonLeftShift_medium_4 = createMortonFromU64Vec<false, mediumBits_4, 4>(Vec4AMedium << uint16_t(castedShift % mediumBits_4));
+                expected.mortonLeftShift_full_4 = createMortonFromU64Vec<false, fullBits_4, 4>(Vec4AFull << uint16_t(castedShift % fullBits_4));
+                expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4AFull << uint16_t(castedShift % fullBits_4));
+                
+                // Unsigned right-shift
+                expected.mortonUnsignedRightShift_small_2 = morton::code<false, smallBits_2, 2>::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2));
+                expected.mortonUnsignedRightShift_medium_2 = morton::code<false, mediumBits_2, 2>::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2));
+                expected.mortonUnsignedRightShift_full_2 = morton::code<false, fullBits_2, 2>::create(Vec2AFull >> uint32_t(castedShift % fullBits_2));
+                expected.mortonUnsignedRightShift_emulated_2 = morton::code<false, fullBits_2, 2, emulated_uint64_t>::create(Vec2AFull >> uint32_t(castedShift % fullBits_2));
+                
+                expected.mortonUnsignedRightShift_small_3 = morton::code<false, smallBits_3, 3>::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3));
+                expected.mortonUnsignedRightShift_medium_3 = morton::code<false, mediumBits_3, 3>::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3));
+                expected.mortonUnsignedRightShift_full_3 = morton::code<false, fullBits_3, 3>::create(Vec3AFull >> uint32_t(castedShift % fullBits_3));
+                expected.mortonUnsignedRightShift_emulated_3 = morton::code<false, fullBits_3, 3, emulated_uint64_t>::create(Vec3AFull >> uint32_t(castedShift % fullBits_3));
+                
+                expected.mortonUnsignedRightShift_small_4 = morton::code<false, smallBits_4, 4>::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4));
+                expected.mortonUnsignedRightShift_medium_4 = morton::code<false, mediumBits_4, 4>::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4));
+                expected.mortonUnsignedRightShift_full_4 = morton::code<false, fullBits_4, 4>::create(Vec4AFull >> uint16_t(castedShift % fullBits_4));
+                expected.mortonUnsignedRightShift_emulated_4 = morton::code<false, fullBits_4, 4, emulated_uint64_t>::create(Vec4AFull >> uint16_t(castedShift % fullBits_4));
+                
+                // Signed right-shift
+                expected.mortonSignedRightShift_small_2 = morton::code<true, smallBits_2, 2>::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2));
+                expected.mortonSignedRightShift_medium_2 = morton::code<true, mediumBits_2, 2>::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2));
+                expected.mortonSignedRightShift_full_2 = morton::code<true, fullBits_2, 2>::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2));
+                expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec<true, fullBits_2, 2, emulated_uint64_t>(Vec2ASignedFull >> int32_t(castedShift % fullBits_2));
+                
+                expected.mortonSignedRightShift_small_3 = morton::code<true, smallBits_3, 3>::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3));
+                expected.mortonSignedRightShift_medium_3 = morton::code<true, mediumBits_3, 3>::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3));
+                expected.mortonSignedRightShift_full_3 = morton::code<true, fullBits_3, 3>::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3));
+                expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec<true, fullBits_3, 3, emulated_uint64_t>(Vec3ASignedFull >> int32_t(castedShift % fullBits_3));
+                
+                expected.mortonSignedRightShift_small_4 = morton::code<true, smallBits_4, 4>::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4));
+                expected.mortonSignedRightShift_medium_4 = morton::code<true, mediumBits_4, 4>::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4));
+                expected.mortonSignedRightShift_full_4 = morton::code<true, fullBits_4, 4>::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4));
+                expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec<true, fullBits_4, 4, emulated_uint64_t>(Vec4ASignedFull >> int16_t(castedShift % fullBits_4));
+            }
+
+            performCpuTests(testInput, expected);
+            performGpuTests(testInput, expected);
+        }
+        m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE);
+    }
+
+private:
+    inline static constexpr int Iterations = 100u;
+
+    void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues)
+    {
+        TestValues cpuTestValues;
+
+        fillTestValues(commonTestInputValues, cpuTestValues);
+        verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU);
+
+    }
+
+    void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues)
+    {
+        TestValues gpuTestValues;
+        gpuTestValues = dispatch<InputTestValues, TestValues>(commonTestInputValues);
+        verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU);
+    }
+
+    void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType)
+    {
+        // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed.
+        verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType);
+        verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType);
+        verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType);
+        verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType);
+        verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType);
+        verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType);
+        verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType);
+        verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType);
+        verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType);
+        verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType);
+        verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType);
+        verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType);
+        verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType);
+        verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType);
+
+        // Morton Plus
+        verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType);
+        verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType);
+        verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType);
+        verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType);
+        
+        verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType);
+        verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType);
+        verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType);
+        verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType);
+        
+        verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType);
+        verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType);
+        verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType);
+        verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType);
+        
+        // Morton Minus
+        verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType);
+        verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType);
+        verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType);
+        verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType);
+        
+        verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType);
+        verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType);
+        verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType);
+        verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType);
+        
+        verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType);
+        verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType);
+        verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType);
+        verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType);
+        
+        // Morton coordinate-wise equality
+        verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType);
+        verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType);
+        verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType);
+        verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType);
+        
+        verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType);
+        verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType);
+        verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType);
+        verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType);
+        
+        verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType);
+        verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType);
+        verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType);
+        verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType);
+        
+        // Morton coordinate-wise unsigned inequality
+        verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType);
+        verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType);
+        verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType);
+        verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType);
+        
+        verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType);
+        verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType);
+        verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType);
+        verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType);
+        
+        verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType);
+        verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType);
+        verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType);
+        // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType);
+        
+        // Morton coordinate-wise signed inequality
+        verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType);
+        verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType);
+        verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType);
+        // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType);
+        
+        verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType);
+        verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType);
+        verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType);
+        // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType);
+        
+        verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType);
+        verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType);
+        verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType);
+        // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType);
+        
+        // Morton left-shift
+        verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType);
+        verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType);
+        verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType);
+        verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType);
+        
+        verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType);
+        verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType);
+        verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType);
+        verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType);
+        
+        verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType);
+        verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType);
+        verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType);
+        verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType);
+        
+        // Morton unsigned right-shift
+        verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType);
+        verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType);
+        verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType);
+        verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType);
+        
+        verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType);
+        verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType);
+        verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType);
+        verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType);
+        
+        verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType);
+        verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType);
+        verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType);
+        verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType);
+        
+        // Morton signed right-shift
+        verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType);
+        verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType);
+        verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType);
+        // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType);
+        
+        verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType);
+        verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType);
+        verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType);
+        //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType);
+        
+        verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType);
+        verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType);
+        verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType);
+        // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType);
+    }
+};
+
+// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test.
+class CTester2 final : public ITester
+{
+public:
+    void performTests()
+    {
+        std::random_device rd;
+        std::mt19937 mt(rd());
+
+        std::uniform_int_distribution<uint32_t> intDistribution(uint32_t(0), std::numeric_limits<uint32_t>::max());
+        std::uniform_int_distribution<uint64_t> longDistribution(uint64_t(0), std::numeric_limits<uint64_t>::max());
+
+        m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE);
+        for (int i = 0; i < Iterations; ++i)
+        {
+            // Set input thest values that will be used in both CPU and GPU tests
+            InputTestValues testInput;
+            // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values
+            TestValues expected;
+
+            uint32_t generatedShift = intDistribution(mt) & uint32_t(63);
+            testInput.shift = generatedShift;
+            {
+                testInput.coordX = longDistribution(mt);
+                testInput.coordY = longDistribution(mt);
+                testInput.coordZ = longDistribution(mt);
+                testInput.coordW = longDistribution(mt);
+
+                uint64_t2 Vec2A = { testInput.coordX, testInput.coordY };
+                uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW };
+
+                uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ };
+                uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW };
+
+                uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW };
+                uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX };
+
+                uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4>(Vec4A);
+                uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4>(Vec4B);
+
+                int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2>(Vec2A);
+                int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2>(Vec2B);
+
+                int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3>(Vec3A);
+                int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3>(Vec3B);
+
+                int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4>(Vec4A);
+                int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4>(Vec4B);
+
+                expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull));
+                
+                expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull));
+                expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull));
+                expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull));
+
+                uint16_t castedShift = uint16_t(generatedShift);
+                expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec<true, fullBits_2, 2, emulated_uint64_t>(Vec2ASignedFull >> int32_t(castedShift % fullBits_2));
+                expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec<true, fullBits_3, 3, emulated_uint64_t>(Vec3ASignedFull >> int32_t(castedShift % fullBits_3));
+                expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec<true, fullBits_4, 4, emulated_uint64_t>(Vec4ASignedFull >> int16_t(castedShift % fullBits_4));
+
+            }
+
+            performCpuTests(testInput, expected);
+            performGpuTests(testInput, expected);
+        }
+        m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE);
+    }
+
+private:
+    inline static constexpr int Iterations = 100u;
+
+    void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues)
+    {
+        TestValues cpuTestValues;
+
+        fillTestValues2(commonTestInputValues, cpuTestValues);
+        verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU);
+
+    }
+
+    void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues)
+    {
+        TestValues gpuTestValues;
+        gpuTestValues = dispatch<InputTestValues, TestValues>(commonTestInputValues);
+        verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU);
+    }
+
+    void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType)
+    {
+        
+        verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType);
+        
+        verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType);
+        verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType);
+        verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType);
+        
+        verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType);
+        verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType);
+        verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType);
+        
+    }
+};
+#endif
\ No newline at end of file
diff --git a/14_Mortons/ITester.h b/14_Mortons/ITester.h
new file mode 100644
index 000000000..3be6d1d6b
--- /dev/null
+++ b/14_Mortons/ITester.h
@@ -0,0 +1,279 @@
+#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_
+#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_
+
+#include <nabla.h>
+#include "app_resources/common.hlsl"
+#include "nbl/application_templates/MonoDeviceApplication.hpp"
+
+using namespace nbl;
+
+class ITester 
+{
+public:
+    virtual ~ITester()
+    {
+        m_outputBufferAllocation.memory->unmap();
+    };
+
+    struct PipelineSetupData
+    {
+        std::string testShaderPath;
+        core::smart_refctd_ptr<video::ILogicalDevice> device;
+        core::smart_refctd_ptr<video::CVulkanConnection> api;
+        core::smart_refctd_ptr<asset::IAssetManager> assetMgr;
+        core::smart_refctd_ptr<system::ILogger> logger;
+        video::IPhysicalDevice* physicalDevice;
+        uint32_t computeFamilyIndex;
+    };
+
+    template<typename InputStruct, typename OutputStruct>
+    void setupPipeline(const PipelineSetupData& pipleineSetupData)
+    {
+        // setting up pipeline in the constructor
+        m_device = core::smart_refctd_ptr(pipleineSetupData.device);
+        m_physicalDevice = pipleineSetupData.physicalDevice;
+        m_api = core::smart_refctd_ptr(pipleineSetupData.api);
+        m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr);
+        m_logger = core::smart_refctd_ptr(pipleineSetupData.logger);
+        m_queueFamily = pipleineSetupData.computeFamilyIndex;
+        m_semaphoreCounter = 0;
+        m_semaphore = m_device->createSemaphore(0);
+        m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+        if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf))
+            logFail("Failed to create Command Buffers!\n");
+
+        // Load shaders, set up pipeline
+        core::smart_refctd_ptr<asset::IShader> shader;
+        {
+            asset::IAssetLoader::SAssetLoadParams lp = {};
+            lp.logger = m_logger.get();
+            lp.workingDirectory = ""; // virtual root
+            auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp);
+            const auto assets = assetBundle.getContents();
+            if (assets.empty())
+                return logFail("Could not load shader!");
+
+            // It would be super weird if loading a shader from a file produced more than 1 asset
+            assert(assets.size() == 1);
+            core::smart_refctd_ptr<asset::IShader> source = asset::IAsset::castDown<asset::IShader>(assets[0]);
+
+            shader = m_device->compileShader({source.get()});
+        }
+
+        if (!shader)
+            logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n");
+
+        video::IGPUDescriptorSetLayout::SBinding bindings[2] = {
+            {
+                .binding = 0,
+                .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
+                .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
+                .stageFlags = ShaderStage::ESS_COMPUTE,
+                .count = 1
+            },
+            {
+                .binding = 1,
+                .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER,
+                .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,
+                .stageFlags = ShaderStage::ESS_COMPUTE,
+                .count = 1
+            }
+        };
+
+        core::smart_refctd_ptr<video::IGPUDescriptorSetLayout> dsLayout = m_device->createDescriptorSetLayout(bindings);
+        if (!dsLayout)
+            logFail("Failed to create a Descriptor Layout!\n");
+
+        m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout));
+        if (!m_pplnLayout)
+            logFail("Failed to create a Pipeline Layout!\n");
+
+        {
+            video::IGPUComputePipeline::SCreationParams params = {};
+            params.layout = m_pplnLayout.get();
+            params.shader.entryPoint = "main";
+            params.shader.shader = shader.get();
+            if (!m_device->createComputePipelines(nullptr, { &params,1 }, &m_pipeline))
+                logFail("Failed to create pipelines (compile & link shaders)!\n");
+        }
+
+        // Allocate memory of the input buffer
+        {
+            constexpr size_t BufferSize = sizeof(InputStruct);
+
+            video::IGPUBuffer::SCreationParams params = {};
+            params.size = BufferSize;
+            params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
+            core::smart_refctd_ptr<video::IGPUBuffer> inputBuff = m_device->createBuffer(std::move(params));
+            if (!inputBuff)
+                logFail("Failed to create a GPU Buffer of size %d!\n", params.size);
+
+            inputBuff->setObjectDebugName("emulated_float64_t output buffer");
+
+            video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs();
+            reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits();
+
+            m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE);
+            if (!m_inputBufferAllocation.isValid())
+                logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n");
+
+            assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get());
+            core::smart_refctd_ptr<video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 });
+
+            m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout));
+            {
+                video::IGPUDescriptorSet::SDescriptorInfo info[1];
+                info[0].desc = core::smart_refctd_ptr(inputBuff);
+                info[0].info.buffer = { .offset = 0,.size = BufferSize };
+                video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
+                    {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info}
+                };
+                m_device->updateDescriptorSets(writes, {});
+            }
+        }
+
+        // Allocate memory of the output buffer
+        {
+            constexpr size_t BufferSize = sizeof(OutputStruct);
+
+            video::IGPUBuffer::SCreationParams params = {};
+            params.size = BufferSize;
+            params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
+            core::smart_refctd_ptr<video::IGPUBuffer> outputBuff = m_device->createBuffer(std::move(params));
+            if (!outputBuff)
+                logFail("Failed to create a GPU Buffer of size %d!\n", params.size);
+
+            outputBuff->setObjectDebugName("emulated_float64_t output buffer");
+
+            video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs();
+            reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits();
+
+            m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE);
+            if (!m_outputBufferAllocation.isValid())
+                logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n");
+
+            assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get());
+            core::smart_refctd_ptr<video::IDescriptorPool> pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 });
+
+            {
+                video::IGPUDescriptorSet::SDescriptorInfo info[1];
+                info[0].desc = core::smart_refctd_ptr(outputBuff);
+                info[0].info.buffer = { .offset = 0,.size = BufferSize };
+                video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = {
+                    {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info}
+                };
+                m_device->updateDescriptorSets(writes, {});
+            }
+        }
+
+        if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ))
+            logFail("Failed to map the Device Memory!\n");
+
+        // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches
+        const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize());
+        if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT))
+            m_device->invalidateMappedMemoryRanges(1, &memoryRange);
+
+        assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct));
+
+        m_queue = m_device->getQueue(m_queueFamily, 0);
+    }
+
+    enum class TestType
+    {
+        CPU,
+        GPU
+    };
+
+    template<typename T>
+    void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType)
+    {
+        if (expectedVal == testVal)
+            return;
+
+        std::stringstream ss;
+        switch (testType)
+        {
+        case TestType::CPU:
+            ss << "CPU TEST ERROR:\n";
+            break;
+        case TestType::GPU:
+            ss << "GPU TEST ERROR:\n";
+        }
+
+        ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n';
+
+        m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR);
+    }
+
+protected:
+    uint32_t m_queueFamily;
+    core::smart_refctd_ptr<video::ILogicalDevice> m_device;
+    core::smart_refctd_ptr<video::CVulkanConnection> m_api;
+    video::IPhysicalDevice* m_physicalDevice;
+    core::smart_refctd_ptr<asset::IAssetManager> m_assetMgr;
+    core::smart_refctd_ptr<system::ILogger> m_logger;
+    video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {};
+    video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {};
+    core::smart_refctd_ptr<video::IGPUCommandBuffer> m_cmdbuf = nullptr;
+    core::smart_refctd_ptr<video::IGPUCommandPool> m_cmdpool = nullptr;
+    core::smart_refctd_ptr<video::IGPUDescriptorSet> m_ds = nullptr;
+    core::smart_refctd_ptr<video::IGPUPipelineLayout> m_pplnLayout = nullptr;
+    core::smart_refctd_ptr<video::IGPUComputePipeline> m_pipeline;
+    core::smart_refctd_ptr<video::ISemaphore> m_semaphore;
+    video::IQueue* m_queue;
+    uint64_t m_semaphoreCounter;
+    
+    template<typename InputStruct, typename OutputStruct>
+    OutputStruct dispatch(const InputStruct& input)
+    {
+        // Update input buffer
+        if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ))
+            logFail("Failed to map the Device Memory!\n");
+
+        const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize());
+        if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT))
+            m_device->invalidateMappedMemoryRanges(1, &memoryRange);
+
+        std::memcpy(static_cast<InputStruct*>(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct));
+
+        m_inputBufferAllocation.memory->unmap();
+
+        // record command buffer
+        m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE);
+        m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE);
+        m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1));
+        m_cmdbuf->bindComputePipeline(m_pipeline.get());
+        m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get());
+        m_cmdbuf->dispatch(1, 1, 1);
+        m_cmdbuf->endDebugMarker();
+        m_cmdbuf->end();
+
+        video::IQueue::SSubmitInfo submitInfos[1] = {};
+        const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} };
+        submitInfos[0].commandBuffers = cmdbufs;
+        const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} };
+        submitInfos[0].signalSemaphores = signals;
+
+        m_api->startCapture();
+        m_queue->submit(submitInfos);
+        m_api->endCapture();
+
+        m_device->waitIdle();
+        OutputStruct output;
+        std::memcpy(&output, static_cast<OutputStruct*>(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct));
+        m_device->waitIdle();
+
+        return output;
+    }
+
+private:
+    template<typename... Args>
+    inline void logFail(const char* msg, Args&&... args)
+    {
+        m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward<Args>(args)...);
+        exit(-1);
+    }
+};
+
+#endif
\ No newline at end of file
diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl
new file mode 100644
index 000000000..98e5e1342
--- /dev/null
+++ b/14_Mortons/app_resources/common.hlsl
@@ -0,0 +1,233 @@
+//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O.
+//// This file is part of the "Nabla Engine".
+//// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_
+#define _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_
+
+#include <boost/preprocessor.hpp>
+
+#include <nbl/builtin/hlsl/morton.hlsl>
+
+NBL_CONSTEXPR uint16_t smallBits_2 = 8;
+NBL_CONSTEXPR uint16_t mediumBits_2 = 16;
+NBL_CONSTEXPR uint16_t fullBits_2 = 32;
+NBL_CONSTEXPR uint16_t smallBits_3 = 5;
+NBL_CONSTEXPR uint16_t mediumBits_3 = 10;
+NBL_CONSTEXPR uint16_t fullBits_3 = 21;
+NBL_CONSTEXPR uint16_t smallBits_4 = 4;
+NBL_CONSTEXPR uint16_t mediumBits_4 = 8;
+NBL_CONSTEXPR uint16_t fullBits_4 = 16;
+
+using namespace nbl::hlsl;
+template <typename T, bool Signed, uint16_t Bits>
+NBL_CONSTEXPR_INLINE_FUNC T createAnyBitIntegerFromU64(uint64_t val)
+{
+  if(Signed)
+  {
+    NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << (Bits - 1)) - 1;
+    // fill excess bit with one
+	if (_static_cast<int64_t>(val) < 0)
+		return _static_cast<T>(val | ~mask);
+	else
+        return _static_cast<T>(val & mask);
+  } else
+  {
+    NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << Bits) - 1;
+    return _static_cast<T>(val & mask);
+  }
+}
+
+template <typename T, bool Signed, uint16_t Bits, uint16_t D>
+NBL_CONSTEXPR_INLINE_FUNC vector<T, D> createAnyBitIntegerVecFromU64Vec(vector<uint64_t, D> val)
+{
+    array_get<portable_vector_t<uint64_t, D>, uint64_t> getter;
+    array_set<portable_vector_t<T, D>, T> setter;
+	vector<T, D> output;
+    NBL_UNROLL
+	for (uint16_t i = 0; i < D; i++)
+	{
+		setter(output, i, createAnyBitIntegerFromU64<T, Signed, Bits>(getter(val, i)));
+	}
+	return output;
+}
+
+template <bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t = uint64_t>
+NBL_CONSTEXPR_INLINE_FUNC morton::code<Signed, Bits, D, _uint64_t> createMortonFromU64Vec(const vector<uint64_t, D> vec)
+{
+	using morton_code_t = morton::code<Signed, Bits, D, _uint64_t>;
+	using decode_component_t = typename morton_code_t::decode_component_t;
+	return morton_code_t::create(createAnyBitIntegerVecFromU64Vec<decode_component_t, Signed, Bits, D>(vec));
+}
+
+struct InputTestValues
+{
+	// Both tests
+	uint32_t shift;
+
+	// Emulated int tests
+	uint64_t generatedA;
+	uint64_t generatedB;
+	
+	// Morton tests
+	uint64_t coordX;
+	uint64_t coordY;
+	uint64_t coordZ;
+	uint64_t coordW;
+};
+
+struct TestValues
+{
+	// Emulated int tests
+	emulated_uint64_t emulatedAnd;
+	emulated_uint64_t emulatedOr;
+	emulated_uint64_t emulatedXor;
+	emulated_uint64_t emulatedNot;
+	emulated_uint64_t emulatedPlus;
+	emulated_uint64_t emulatedMinus;
+	emulated_int64_t emulatedUnaryMinus;
+	// These are bools but stored as uint because you can't store bools, causes a SPIR-V issue
+	uint32_t emulatedLess;
+	uint32_t emulatedLessEqual;
+	uint32_t emulatedGreater;
+	uint32_t emulatedGreaterEqual;
+	emulated_uint64_t emulatedLeftShifted;
+	emulated_uint64_t emulatedUnsignedRightShifted;
+	emulated_int64_t  emulatedSignedRightShifted;
+
+	// Morton tests - for each dimension let's do one small, medium and full-szied (max bits possible) test to cover representation with
+	// 16, 32 and 64-bit types. Could make it more exhaustive with macros (test all possible bitwidths)
+	// For emulated mortons, we store only the emulated uint64 representing it, because DXC complains about bitcasts otherwise
+
+	// Plus
+	morton::code<false, smallBits_2, 2>					  mortonPlus_small_2;
+	morton::code<false, mediumBits_2, 2>				  mortonPlus_medium_2;
+	morton::code<false, fullBits_2, 2>					  mortonPlus_full_2;
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> mortonPlus_emulated_2;
+	
+	morton::code<false, smallBits_3, 3>					  mortonPlus_small_3;
+	morton::code<false, mediumBits_3, 3>				  mortonPlus_medium_3;
+	morton::code<false, fullBits_3, 3>					  mortonPlus_full_3;
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> mortonPlus_emulated_3;
+	
+	morton::code<false, smallBits_4, 4>					  mortonPlus_small_4;
+	morton::code<false, mediumBits_4, 4>				  mortonPlus_medium_4;
+	morton::code<false, fullBits_4, 4>					  mortonPlus_full_4;
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> mortonPlus_emulated_4;
+	
+	// Minus
+	morton::code<false, smallBits_2, 2>					  mortonMinus_small_2;
+	morton::code<false, mediumBits_2, 2>				  mortonMinus_medium_2;
+	morton::code<false, fullBits_2, 2>					  mortonMinus_full_2;
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> mortonMinus_emulated_2;
+	
+	morton::code<false, smallBits_3, 3>					  mortonMinus_small_3;
+	morton::code<false, mediumBits_3, 3>				  mortonMinus_medium_3;
+	morton::code<false, fullBits_3, 3>					  mortonMinus_full_3;
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> mortonMinus_emulated_3;
+	
+	morton::code<false, smallBits_4, 4>					  mortonMinus_small_4;
+	morton::code<false, mediumBits_4, 4>				  mortonMinus_medium_4;
+	morton::code<false, fullBits_4, 4>					  mortonMinus_full_4;
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> mortonMinus_emulated_4;
+
+	// Coordinate-wise equality (these are bools)
+	uint32_t2 mortonEqual_small_2;
+	uint32_t2 mortonEqual_medium_2;
+	uint32_t2 mortonEqual_full_2;
+	uint32_t2 mortonEqual_emulated_2;
+
+	uint32_t3 mortonEqual_small_3;
+	uint32_t3 mortonEqual_medium_3;
+	uint32_t3 mortonEqual_full_3;
+	uint32_t3 mortonEqual_emulated_3;
+
+	uint32_t4 mortonEqual_small_4;
+	uint32_t4 mortonEqual_medium_4;
+	uint32_t4 mortonEqual_full_4;
+	uint32_t4 mortonEqual_emulated_4;
+
+	// Coordinate-wise unsigned inequality (just testing with less, again these are bools)
+	uint32_t2 mortonUnsignedLess_small_2;
+	uint32_t2 mortonUnsignedLess_medium_2;
+	uint32_t2 mortonUnsignedLess_full_2;
+	uint32_t2 mortonUnsignedLess_emulated_2;
+
+	uint32_t3 mortonUnsignedLess_small_3;
+	uint32_t3 mortonUnsignedLess_medium_3;
+	uint32_t3 mortonUnsignedLess_full_3;
+	uint32_t3 mortonUnsignedLess_emulated_3;
+
+	uint32_t4 mortonUnsignedLess_small_4;
+	uint32_t4 mortonUnsignedLess_medium_4;
+	uint32_t4 mortonUnsignedLess_full_4;
+	uint32_t4 mortonUnsignedLess_emulated_4;
+
+	// Coordinate-wise signed inequality (bools)
+	uint32_t2 mortonSignedLess_small_2;
+	uint32_t2 mortonSignedLess_medium_2;
+	uint32_t2 mortonSignedLess_full_2;
+	uint32_t2 mortonSignedLess_emulated_2;
+
+	uint32_t3 mortonSignedLess_small_3;
+	uint32_t3 mortonSignedLess_medium_3;
+	uint32_t3 mortonSignedLess_full_3;
+	uint32_t3 mortonSignedLess_emulated_3;
+
+	uint32_t4 mortonSignedLess_small_4;
+	uint32_t4 mortonSignedLess_medium_4;
+	uint32_t4 mortonSignedLess_full_4;
+	uint32_t4 mortonSignedLess_emulated_4;
+
+	// Left-shift
+	morton::code<false, smallBits_2, 2>					  mortonLeftShift_small_2;
+	morton::code<false, mediumBits_2, 2>				  mortonLeftShift_medium_2;
+	morton::code<false, fullBits_2, 2>					  mortonLeftShift_full_2;
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> mortonLeftShift_emulated_2;
+
+	morton::code<false, smallBits_3, 3>					  mortonLeftShift_small_3;
+	morton::code<false, mediumBits_3, 3>				  mortonLeftShift_medium_3;
+	morton::code<false, fullBits_3, 3>					  mortonLeftShift_full_3;
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> mortonLeftShift_emulated_3;
+
+	morton::code<false, smallBits_4, 4>					  mortonLeftShift_small_4;
+	morton::code<false, mediumBits_4, 4>				  mortonLeftShift_medium_4;
+	morton::code<false, fullBits_4, 4>					  mortonLeftShift_full_4;
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> mortonLeftShift_emulated_4;
+
+	// Unsigned right-shift
+	morton::code<false, smallBits_2, 2>					  mortonUnsignedRightShift_small_2;
+	morton::code<false, mediumBits_2, 2>				  mortonUnsignedRightShift_medium_2;
+	morton::code<false, fullBits_2, 2>					  mortonUnsignedRightShift_full_2;
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> mortonUnsignedRightShift_emulated_2;
+
+	morton::code<false, smallBits_3, 3>					  mortonUnsignedRightShift_small_3;
+	morton::code<false, mediumBits_3, 3>				  mortonUnsignedRightShift_medium_3;
+	morton::code<false, fullBits_3, 3>					  mortonUnsignedRightShift_full_3;
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> mortonUnsignedRightShift_emulated_3;
+
+	morton::code<false, smallBits_4, 4>					  mortonUnsignedRightShift_small_4;
+	morton::code<false, mediumBits_4, 4>				  mortonUnsignedRightShift_medium_4;
+	morton::code<false, fullBits_4, 4>					  mortonUnsignedRightShift_full_4;
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> mortonUnsignedRightShift_emulated_4;
+
+	// Signed right-shift
+	morton::code<true, smallBits_2, 2>					  mortonSignedRightShift_small_2;
+	morton::code<true, mediumBits_2, 2>					  mortonSignedRightShift_medium_2;
+	morton::code<true, fullBits_2, 2>					  mortonSignedRightShift_full_2;
+	morton::code<true, fullBits_2, 2, emulated_uint64_t>  mortonSignedRightShift_emulated_2;
+
+	morton::code<true, smallBits_3, 3>					  mortonSignedRightShift_small_3;
+	morton::code<true, mediumBits_3, 3>					  mortonSignedRightShift_medium_3;
+	morton::code<true, fullBits_3, 3>					  mortonSignedRightShift_full_3;
+	morton::code<true, fullBits_3, 3, emulated_uint64_t>  mortonSignedRightShift_emulated_3;
+
+	morton::code<true, smallBits_4, 4>					  mortonSignedRightShift_small_4;
+	morton::code<true, mediumBits_4, 4>					  mortonSignedRightShift_medium_4;
+	morton::code<true, fullBits_4, 4>					  mortonSignedRightShift_full_4;
+	morton::code<true, fullBits_4, 4, emulated_uint64_t>  mortonSignedRightShift_emulated_4;
+
+	
+};
+
+#endif
diff --git a/14_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl
new file mode 100644
index 000000000..60cdf94b1
--- /dev/null
+++ b/14_Mortons/app_resources/test.comp.hlsl
@@ -0,0 +1,17 @@
+//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O.
+//// This file is part of the "Nabla Engine".
+//// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "testCommon.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
+
+[[vk::binding(0, 0)]] RWStructuredBuffer<InputTestValues> inputTestValues;
+[[vk::binding(1, 0)]] RWStructuredBuffer<TestValues> outputTestValues;
+
+[numthreads(1, 1, 1)]
+[shader("compute")]
+void main(uint3 invocationID : SV_DispatchThreadID)
+{
+    uint32_t testID = glsl::gl_GlobalInvocationID().x;
+    fillTestValues(inputTestValues[testID], outputTestValues[testID]);
+}
diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl
new file mode 100644
index 000000000..30b998f49
--- /dev/null
+++ b/14_Mortons/app_resources/test2.comp.hlsl
@@ -0,0 +1,17 @@
+//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O.
+//// This file is part of the "Nabla Engine".
+//// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "testCommon2.hlsl"
+#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
+
+[[vk::binding(0, 0)]] RWStructuredBuffer<InputTestValues> inputTestValues;
+[[vk::binding(1, 0)]] RWStructuredBuffer<TestValues> outputTestValues;
+
+[numthreads(1, 1, 1)]
+[shader("compute")]
+void main(uint3 invocationID : SV_DispatchThreadID)
+{
+    uint32_t testID = glsl::gl_GlobalInvocationID().x;
+    fillTestValues2(inputTestValues[testID], outputTestValues[testID]);
+}
diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl
new file mode 100644
index 000000000..6144b6ce9
--- /dev/null
+++ b/14_Mortons/app_resources/testCommon.hlsl
@@ -0,0 +1,295 @@
+#include "common.hlsl"
+
+
+void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output)
+{
+	emulated_uint64_t emulatedA = _static_cast<emulated_uint64_t>(input.generatedA);
+	emulated_uint64_t emulatedB = _static_cast<emulated_uint64_t>(input.generatedB);
+	emulated_int64_t signedEmulatedA = _static_cast<emulated_int64_t>(input.generatedA);
+
+	// Emulated int tests
+	output.emulatedAnd = emulatedA & emulatedB;
+	output.emulatedOr = emulatedA | emulatedB;
+	output.emulatedXor = emulatedA ^ emulatedB;
+	output.emulatedNot = emulatedA.operator~();
+	output.emulatedPlus = emulatedA + emulatedB;
+	output.emulatedMinus = emulatedA - emulatedB;
+	output.emulatedLess = uint32_t(emulatedA < emulatedB);
+	output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB);
+	output.emulatedGreater = uint32_t(emulatedA > emulatedB);
+	output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB);
+
+	left_shift_operator<emulated_uint64_t> leftShift;
+	output.emulatedLeftShifted = leftShift(emulatedA, input.shift);
+
+	arithmetic_right_shift_operator<emulated_uint64_t> unsignedRightShift;
+	output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift);
+
+	arithmetic_right_shift_operator<emulated_int64_t> signedRightShift;
+	output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift);
+
+	output.emulatedUnaryMinus = signedEmulatedA.operator-();
+
+	// Morton tests
+	uint64_t2 Vec2A = { input.coordX, input.coordY };
+	uint64_t2 Vec2B = { input.coordZ, input.coordW };
+
+	uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ };
+	uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW };
+
+	uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW };
+	uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX };
+
+	uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_2, 2>(Vec2A);
+	uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_2, 2>(Vec2B);
+	uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_2, 2>(Vec2A);
+	uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_2, 2>(Vec2B);
+	uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_2, 2>(Vec2A);
+  	uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_2, 2>(Vec2B);
+
+	uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_3, 3>(Vec3A);
+	uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_3, 3>(Vec3B);
+	uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_3, 3>(Vec3A);
+	uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_3, 3>(Vec3B);
+	uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_3, 3>(Vec3A);
+	uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec<uint32_t, false, fullBits_3, 3>(Vec3B);
+
+	uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_4, 4>(Vec4A);
+	uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec<uint16_t, false, smallBits_4, 4>(Vec4B);
+	uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_4, 4>(Vec4A);
+	uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec<uint16_t, false, mediumBits_4, 4>(Vec4B);
+	uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4, 4>(Vec4A);
+	uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4, 4>(Vec4B);
+
+	int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_2, 2>(Vec2A);
+	int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_2, 2>(Vec2B);
+	int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true,mediumBits_2, 2 >(Vec2A);
+	int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_2, 2>(Vec2B);
+	int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2, 2>(Vec2A);
+	int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2, 2>(Vec2B);
+
+	int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_3, 3>(Vec3A);
+	int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_3, 3>(Vec3B);
+	int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_3, 3>(Vec3A);
+	int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_3, 3>(Vec3B);
+	int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3, 3>(Vec3A);
+	int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3, 3>(Vec3B);
+
+	int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_4, 4>(Vec4A);
+	int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec<int16_t, true, smallBits_4, 4>(Vec4B);
+	int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_4, 4>(Vec4A);
+	int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec<int16_t, true, mediumBits_4, 4>(Vec4B);
+	int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4, 4>(Vec4A);
+	int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4, 4>(Vec4B);
+
+	morton::code<false, smallBits_2, 2> morton_small_2A = createMortonFromU64Vec<false, smallBits_2, 2>(Vec2A);
+	morton::code<false, mediumBits_2, 2> morton_medium_2A = createMortonFromU64Vec<false, mediumBits_2, 2>(Vec2A);
+	morton::code<false, fullBits_2, 2> morton_full_2A = createMortonFromU64Vec<false, fullBits_2, 2>(Vec2A);
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> morton_emulated_2A = createMortonFromU64Vec<false, fullBits_2, 2, emulated_uint64_t>(Vec2A);
+	morton::code<false, smallBits_2, 2> morton_small_2B = createMortonFromU64Vec<false, smallBits_2, 2>(Vec2B);
+	morton::code<false, mediumBits_2, 2> morton_medium_2B = createMortonFromU64Vec<false, mediumBits_2, 2>(Vec2B);
+	morton::code<false, fullBits_2, 2> morton_full_2B = createMortonFromU64Vec<false, fullBits_2, 2>(Vec2B);
+	morton::code<false, fullBits_2, 2, emulated_uint64_t> morton_emulated_2B = createMortonFromU64Vec<false, fullBits_2, 2, emulated_uint64_t>(Vec2B);
+	
+	morton::code<false, smallBits_3, 3> morton_small_3A = createMortonFromU64Vec<false, smallBits_3, 3>(Vec3A);
+	morton::code<false, mediumBits_3, 3> morton_medium_3A = createMortonFromU64Vec<false, mediumBits_3, 3>(Vec3A);
+	morton::code<false, fullBits_3, 3> morton_full_3A = createMortonFromU64Vec<false, fullBits_3, 3>(Vec3A);
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> morton_emulated_3A = createMortonFromU64Vec<false, fullBits_3, 3, emulated_uint64_t>(Vec3A);
+	morton::code<false, smallBits_3, 3> morton_small_3B = createMortonFromU64Vec<false, smallBits_3, 3>(Vec3B);
+	morton::code<false, mediumBits_3, 3> morton_medium_3B = createMortonFromU64Vec<false, mediumBits_3, 3>(Vec3B);
+	morton::code<false, fullBits_3, 3> morton_full_3B = createMortonFromU64Vec<false, fullBits_3, 3>(Vec3B);
+	morton::code<false, fullBits_3, 3, emulated_uint64_t> morton_emulated_3B = createMortonFromU64Vec<false, fullBits_3, 3, emulated_uint64_t>(Vec3B);
+	
+	morton::code<false, smallBits_4, 4> morton_small_4A = createMortonFromU64Vec<false, smallBits_4, 4>(Vec4A);
+	morton::code<false, mediumBits_4, 4> morton_medium_4A = createMortonFromU64Vec<false, mediumBits_4, 4>(Vec4A);
+	morton::code<false, fullBits_4, 4> morton_full_4A = createMortonFromU64Vec<false, fullBits_4, 4>(Vec4A);
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> morton_emulated_4A = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4A);
+	morton::code<false, smallBits_4, 4> morton_small_4B = createMortonFromU64Vec<false, smallBits_4, 4>(Vec4B);
+	morton::code<false, mediumBits_4, 4> morton_medium_4B = createMortonFromU64Vec<false, mediumBits_4, 4>(Vec4B);
+	morton::code<false, fullBits_4, 4> morton_full_4B = createMortonFromU64Vec<false, fullBits_4, 4>(Vec4B);
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> morton_emulated_4B = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4B);
+	
+	morton::code<true, smallBits_2, 2> morton_small_2_signed = createMortonFromU64Vec<true, smallBits_2, 2>(Vec2A);
+	morton::code<true, mediumBits_2, 2> morton_medium_2_signed = createMortonFromU64Vec<true, mediumBits_2, 2>(Vec2A);
+	morton::code<true, fullBits_2, 2> morton_full_2_signed = createMortonFromU64Vec<true, fullBits_2, 2>(Vec2A);
+	morton::code<true, fullBits_2, 2, emulated_uint64_t> morton_emulated_2_signed = createMortonFromU64Vec<true, fullBits_2, 2, emulated_uint64_t>(Vec2A);
+	
+	morton::code<true, smallBits_3, 3> morton_small_3_signed = createMortonFromU64Vec<true, smallBits_3, 3>(Vec3A);
+	morton::code<true, mediumBits_3, 3> morton_medium_3_signed = createMortonFromU64Vec<true, mediumBits_3, 3>(Vec3A);
+	morton::code<true, fullBits_3, 3> morton_full_3_signed = createMortonFromU64Vec<true, fullBits_3, 3>(Vec3A);
+	morton::code<true, fullBits_3, 3, emulated_uint64_t> morton_emulated_3_signed = createMortonFromU64Vec<true, fullBits_3, 3, emulated_uint64_t>(Vec3A);
+	
+	morton::code<true, smallBits_4, 4> morton_small_4_signed = createMortonFromU64Vec<true, smallBits_4, 4>(Vec4A);
+	morton::code<true, mediumBits_4, 4> morton_medium_4_signed = createMortonFromU64Vec<true, mediumBits_4, 4>(Vec4A);
+	morton::code<true, fullBits_4, 4> morton_full_4_signed = createMortonFromU64Vec<true, fullBits_4, 4>(Vec4A);
+	morton::code<true, fullBits_4, 4, emulated_uint64_t> morton_emulated_4_signed = createMortonFromU64Vec<true, fullBits_4, 4, emulated_uint64_t>(Vec4A);
+
+    // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed.
+	// Plus
+	output.mortonPlus_small_2 = morton_small_2A + morton_small_2B;
+	output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B;
+	output.mortonPlus_full_2 = morton_full_2A + morton_full_2B;
+	output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B;
+	
+	output.mortonPlus_small_3 = morton_small_3A + morton_small_3B;
+	output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B;
+	output.mortonPlus_full_3 = morton_full_3A + morton_full_3B;
+	output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B;
+	
+	output.mortonPlus_small_4 = morton_small_4A + morton_small_4B;
+	output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B;
+	output.mortonPlus_full_4 = morton_full_4A + morton_full_4B;
+	output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B;
+	
+	// Minus
+	output.mortonMinus_small_2 = morton_small_2A - morton_small_2B;
+	output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B;
+	output.mortonMinus_full_2 = morton_full_2A - morton_full_2B;
+	output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B;
+	
+	output.mortonMinus_small_3 = morton_small_3A - morton_small_3B;
+	output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B;
+	output.mortonMinus_full_3 = morton_full_3A - morton_full_3B;
+	output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B;
+	
+	output.mortonMinus_small_4 = morton_small_4A - morton_small_4B;
+	output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B;
+	output.mortonMinus_full_4 = morton_full_4A - morton_full_4B;
+	output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B;
+	
+	// Coordinate-wise equality
+	output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal<false>(Vec2BSmall));
+	output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal<false>(Vec2BMedium));
+	output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal<false>(Vec2BFull));
+	output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal<false>(Vec2BFull));
+	
+	output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal<false>(Vec3BSmall));
+	output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal<false>(Vec3BMedium));
+	output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal<false>(Vec3BFull));
+	output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal<false>(Vec3BFull));
+	
+	output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal<false>(Vec4BSmall));
+	output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal<false>(Vec4BMedium));
+	output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal<false>(Vec4BFull));
+    output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal<false>(Vec4BFull));
+	
+	// Coordinate-wise unsigned inequality (just testing with less)
+	output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan<false>(Vec2BSmall));
+	output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan<false>(Vec2BMedium));
+	output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan<false>(Vec2BFull));
+	output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan<false>(Vec2BFull));
+	
+	output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan<false>(Vec3BSmall));
+	output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan<false>(Vec3BMedium));
+	output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan<false>(Vec3BFull));
+	output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan<false>(Vec3BFull));
+	
+	output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan<false>(Vec4BSmall));
+	output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan<false>(Vec4BMedium));
+	output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan<false>(Vec4BFull));
+	// output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan<false>(Vec4BFull));
+	
+	// Coordinate-wise signed inequality
+	output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan<false>(Vec2BSignedSmall));
+	output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan<false>(Vec2BSignedMedium));
+	output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan<false>(Vec2BSignedFull));
+	// output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan<false>(Vec2BSignedFull)); 
+	
+	output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan<false>(Vec3BSignedSmall));
+	output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan<false>(Vec3BSignedMedium));
+	output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan<false>(Vec3BSignedFull));
+	// output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan<false>(Vec3BSignedFull)); 
+	
+	output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan<false>(Vec4BSignedSmall));
+	output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan<false>(Vec4BSignedMedium));
+	output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan<false>(Vec4BSignedFull));
+	// output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan<false>(Vec4BSignedFull)); 
+	
+	// Cast to uint16_t which is what left shift for Mortons expect
+	uint16_t castedShift = uint16_t(input.shift);
+	// Each left shift clamps to correct bits so the result kinda makes sense
+	// Left-shift
+	left_shift_operator<morton::code<false, smallBits_2, 2> > leftShiftSmall2;
+	output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2);
+	left_shift_operator<morton::code<false, mediumBits_2, 2> > leftShiftMedium2;
+	output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2);
+	left_shift_operator<morton::code<false, fullBits_2, 2> > leftShiftFull2;
+	output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2);
+	left_shift_operator<morton::code<false, fullBits_2, 2, emulated_uint64_t> > leftShiftEmulated2;
+	output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2);
+	
+	left_shift_operator<morton::code<false, smallBits_3, 3> > leftShiftSmall3;
+	output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3);
+	left_shift_operator<morton::code<false, mediumBits_3, 3> > leftShiftMedium3;
+	output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3);
+	left_shift_operator<morton::code<false, fullBits_3, 3> > leftShiftFull3;
+	output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3);
+	left_shift_operator<morton::code<false, fullBits_3, 3, emulated_uint64_t> > leftShiftEmulated3;
+	output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3);
+	
+	left_shift_operator<morton::code<false, smallBits_4, 4> > leftShiftSmall4;
+	output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4);
+	left_shift_operator<morton::code<false, mediumBits_4, 4> > leftShiftMedium4;
+	output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4);
+	left_shift_operator<morton::code<false, fullBits_4, 4> > leftShiftFull4;
+	output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4);
+	left_shift_operator<morton::code<false, fullBits_4, 4, emulated_uint64_t> > leftShiftEmulated4;
+	output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4);
+	
+	// Unsigned right-shift
+	arithmetic_right_shift_operator<morton::code<false, smallBits_2, 2> > rightShiftSmall2;
+	output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2);
+	arithmetic_right_shift_operator<morton::code<false, mediumBits_2, 2> > rightShiftMedium2;
+	output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_2, 2> > rightShiftFull2;
+	output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_2, 2, emulated_uint64_t> > rightShiftEmulated2;
+	output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2);
+	
+	arithmetic_right_shift_operator<morton::code<false, smallBits_3, 3> > rightShiftSmall3;
+	output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3);
+	arithmetic_right_shift_operator<morton::code<false, mediumBits_3, 3> > rightShiftMedium3;
+	output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_3, 3> > rightShiftFull3;
+	output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_3, 3, emulated_uint64_t> > rightShiftEmulated3;
+	output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3);
+	
+	arithmetic_right_shift_operator<morton::code<false, smallBits_4, 4> > rightShiftSmall4;
+	output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4);
+	arithmetic_right_shift_operator<morton::code<false, mediumBits_4, 4> > rightShiftMedium4;
+	output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_4, 4> > rightShiftFull4;
+	output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4);
+	arithmetic_right_shift_operator<morton::code<false, fullBits_4, 4, emulated_uint64_t> > rightShiftEmulated4;
+	output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4);
+	
+	// Signed right-shift
+	arithmetic_right_shift_operator<morton::code<true, smallBits_2, 2> > rightShiftSignedSmall2;
+	output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2);
+	arithmetic_right_shift_operator<morton::code<true, mediumBits_2, 2> > rightShiftSignedMedium2;
+	output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2);
+	arithmetic_right_shift_operator<morton::code<true, fullBits_2, 2> > rightShiftSignedFull2;
+	output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2);
+	// arithmetic_right_shift_operator<morton::code<true, fullBits_2, 2, emulated_uint64_t> > rightShiftSignedEmulated2;
+	// output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); 
+	
+	arithmetic_right_shift_operator<morton::code<true, smallBits_3, 3> > rightShiftSignedSmall3;
+	output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3);
+	arithmetic_right_shift_operator<morton::code<true, mediumBits_3, 3> > rightShiftSignedMedium3;
+	output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3);
+	arithmetic_right_shift_operator<morton::code<true, fullBits_3, 3> > rightShiftSignedFull3;
+	output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3);
+	// arithmetic_right_shift_operator<morton::code<true, fullBits_3, 3, emulated_uint64_t> > rightShiftSignedEmulated3;
+	// output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); 
+	
+	arithmetic_right_shift_operator<morton::code<true, smallBits_4, 4> > rightShiftSignedSmall4;
+	output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4);
+	arithmetic_right_shift_operator<morton::code<true, mediumBits_4, 4> > rightShiftSignedMedium4;
+	output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4);
+	arithmetic_right_shift_operator<morton::code<true, fullBits_4, 4> > rightShiftSignedFull4;
+	output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4);
+	// arithmetic_right_shift_operator<morton::code<true, fullBits_4, 4, emulated_uint64_t> > rightShiftSignedEmulated4;
+	// output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); 
+
+}
\ No newline at end of file
diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl
new file mode 100644
index 000000000..365b82340
--- /dev/null
+++ b/14_Mortons/app_resources/testCommon2.hlsl
@@ -0,0 +1,39 @@
+#include "common.hlsl"
+
+void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output)
+{
+	uint64_t2 Vec2A = { input.coordX, input.coordY };
+	uint64_t2 Vec2B = { input.coordZ, input.coordW };
+
+	uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ };
+	uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW };
+
+	uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW };
+	uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX };
+
+	uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec<uint16_t, false, fullBits_4, 4>(Vec4B);
+	int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_2, 2>(Vec2B);
+	int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec<int32_t, true, fullBits_3, 3>(Vec3B);
+	int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec<int16_t, true, fullBits_4, 4>(Vec4B);
+
+	morton::code<false, fullBits_4, 4, emulated_uint64_t> morton_emulated_4A = createMortonFromU64Vec<false, fullBits_4, 4, emulated_uint64_t>(Vec4A);
+	morton::code<true, fullBits_2, 2, emulated_uint64_t> morton_emulated_2_signed = createMortonFromU64Vec<true, fullBits_2, 2, emulated_uint64_t>(Vec2A);
+	morton::code<true, fullBits_3, 3, emulated_uint64_t> morton_emulated_3_signed = createMortonFromU64Vec<true, fullBits_3, 3, emulated_uint64_t>(Vec3A);
+	morton::code<true, fullBits_4, 4, emulated_uint64_t> morton_emulated_4_signed = createMortonFromU64Vec<true, fullBits_4, 4, emulated_uint64_t>(Vec4A);
+
+	
+	output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan<false>(Vec4BFull));
+	
+	output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan<false>(Vec2BSignedFull)); 
+	output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan<false>(Vec3BSignedFull)); 
+	output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan<false>(Vec4BSignedFull)); 
+
+	uint16_t castedShift = uint16_t(input.shift);
+
+	arithmetic_right_shift_operator<morton::code<true, fullBits_2, 2, emulated_uint64_t> > rightShiftSignedEmulated2;
+	output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); 
+	arithmetic_right_shift_operator<morton::code<true, fullBits_3, 3, emulated_uint64_t> > rightShiftSignedEmulated3;
+	output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); 
+	arithmetic_right_shift_operator<morton::code<true, fullBits_4, 4, emulated_uint64_t> > rightShiftSignedEmulated4;
+	output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); 
+}
diff --git a/14_Mortons/config.json.template b/14_Mortons/config.json.template
new file mode 100644
index 000000000..717d05d53
--- /dev/null
+++ b/14_Mortons/config.json.template
@@ -0,0 +1,28 @@
+{
+  "enableParallelBuild": true,
+  "threadsPerBuildProcess" : 2,
+  "isExecuted": false,
+  "scriptPath": "",
+  "cmake": {
+    "configurations": [ "Release", "Debug", "RelWithDebInfo" ],
+    "buildModes": [],
+    "requiredOptions": []
+  }, 
+  "profiles": [
+    {
+      "backend": "vulkan", // should be none
+      "platform": "windows",
+      "buildModes": [],
+      "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example
+      "gpuArchitectures": []
+    }
+  ],
+  "dependencies": [],
+  "data": [
+    {
+      "dependencies": [],
+      "command": [""],
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp
new file mode 100644
index 000000000..12f55805f
--- /dev/null
+++ b/14_Mortons/main.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#include <nabla.h>
+#include <assert.h>
+
+#include "nbl/application_templates/MonoDeviceApplication.hpp"
+#include "nbl/examples/common/BuiltinResourcesApplication.hpp"
+
+#include "app_resources/common.hlsl"
+#include "CTester.h"
+
+using namespace nbl::core;
+using namespace nbl::hlsl;
+using namespace nbl::system;
+using namespace nbl::asset;
+using namespace nbl::video;
+using namespace nbl::examples;
+using namespace nbl::application_templates;
+
+class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesApplication
+{
+    using device_base_t = MonoDeviceApplication;
+    using asset_base_t = BuiltinResourcesApplication;
+public:
+    MortonTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) :
+        IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {
+    }
+
+    bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
+    {
+        // Remember to call the base class initialization!
+        if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+        if (!asset_base_t::onAppInitialized(std::move(system)))
+            return false;
+        
+        CTester::PipelineSetupData pplnSetupData;
+        pplnSetupData.device = m_device;
+        pplnSetupData.api = m_api;
+        pplnSetupData.assetMgr = m_assetMgr;
+        pplnSetupData.logger = m_logger;
+        pplnSetupData.physicalDevice = m_physicalDevice;
+        pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex();
+        // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator
+        // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104
+        {
+            CTester mortonTester;
+            pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl";
+            mortonTester.setupPipeline<InputTestValues, TestValues>(pplnSetupData);
+            mortonTester.performTests();
+
+            CTester2 mortonTester2;
+            pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl";
+            mortonTester2.setupPipeline<InputTestValues, TestValues>(pplnSetupData);
+            mortonTester2.performTests();
+        }
+
+        return true;
+    }
+
+    void onAppTerminated_impl() override
+    {
+        m_device->waitIdle();
+    }
+
+    void workLoopBody() override
+    {
+        m_keepRunning = false;
+    }
+
+    bool keepRunning() override
+    {
+        return m_keepRunning;
+    }
+
+
+private:
+    bool m_keepRunning = true;
+};
+
+NBL_MAIN_FUNC(MortonTest)
\ No newline at end of file
diff --git a/14_Mortons/pipeline.groovy b/14_Mortons/pipeline.groovy
new file mode 100644
index 000000000..1a7b043a4
--- /dev/null
+++ b/14_Mortons/pipeline.groovy
@@ -0,0 +1,50 @@
+import org.DevshGraphicsProgramming.Agent
+import org.DevshGraphicsProgramming.BuilderInfo
+import org.DevshGraphicsProgramming.IBuilder
+
+class CStreamingAndBufferDeviceAddressBuilder extends IBuilder
+{
+	public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info)
+	{
+		super(_agent, _info)
+	}
+	
+	@Override
+	public boolean prepare(Map axisMapping)
+	{
+		return true
+	}
+	
+	@Override
+  	public boolean build(Map axisMapping)
+	{
+		IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION")
+		IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE")
+		
+		def nameOfBuildDirectory = getNameOfBuildDirectory(buildType)
+		def nameOfConfig = getNameOfConfig(config)
+		
+		agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v")
+		
+		return true
+	}
+	
+	@Override
+  	public boolean test(Map axisMapping)
+	{
+		return true
+	}
+	
+	@Override
+	public boolean install(Map axisMapping)
+	{
+		return true
+	}
+}
+
+def create(Agent _agent, _info)
+{
+	return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info)
+}
+
+return this
\ No newline at end of file
diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h
index d053977c0..f014bd1cb 100644
--- a/22_CppCompat/CIntrinsicsTester.h
+++ b/22_CppCompat/CIntrinsicsTester.h
@@ -86,6 +86,10 @@ class CIntrinsicsTester final : public ITester
             testInput.smoothStepEdge0 = realDistributionNeg(mt);
             testInput.smoothStepEdge1 = realDistributionPos(mt);
             testInput.smoothStepX = realDistribution(mt);
+            testInput.addCarryA = std::numeric_limits<uint32_t>::max() - uintDistribution(mt);
+            testInput.addCarryB = uintDistribution(mt);
+            testInput.subBorrowA = uintDistribution(mt);
+            testInput.subBorrowB = uintDistribution(mt);
 
             testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt));
             testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt));
@@ -120,6 +124,10 @@ class CIntrinsicsTester final : public ITester
             testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt));
             testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)));
             testInput.refractEta = realDistribution(mt);
+            testInput.addCarryAVec = uint32_t3(std::numeric_limits<uint32_t>::max() - uintDistribution(mt), std::numeric_limits<uint32_t>::max() - uintDistribution(mt), std::numeric_limits<uint32_t>::max() - uintDistribution(mt));
+            testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt));
+            testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt));
+            testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt));
 
             // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values
             IntrinsicsTestValues expected;
@@ -140,6 +148,9 @@ class CIntrinsicsTester final : public ITester
             expected.step = glm::step(testInput.stepEdge, testInput.stepX);
             expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX);
 
+            expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry);
+            expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow);
+
             expected.frac = testInput.frac - std::floor(testInput.frac);
             expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse);
 
@@ -182,6 +193,9 @@ class CIntrinsicsTester final : public ITester
             expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN);
             expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta);
 
+            expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry);
+            expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow);
+
             auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs);
             expected.mul = reinterpret_cast<float32_t3x3&>(mulGlm);
             auto transposeGlm = glm::transpose(reinterpret_cast<typename float32_t3x3::Base const&>(testInput.transpose));
@@ -201,6 +215,7 @@ class CIntrinsicsTester final : public ITester
     void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues)
     {
         IntrinsicsTestValues cpuTestValues;
+
         cpuTestValues.fillTestValues(commonTestInputValues);
         verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU);
 
@@ -233,6 +248,10 @@ class CIntrinsicsTester final : public ITester
         verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType);
         verifyTestValue("step", expectedTestValues.step, testValues.step, testType);
         verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType);
+        verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType);
+        verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType);
+        verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType);
+        verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType);
 
         verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType);
         verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType);
@@ -255,6 +274,10 @@ class CIntrinsicsTester final : public ITester
         verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType);
         verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType);
         verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType);
+        verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType);
+        verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType);
+        verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType);
+        verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType);
 
         verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType);
         verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType);
diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h
index 4ecd522b9..39ceb8141 100644
--- a/22_CppCompat/ITester.h
+++ b/22_CppCompat/ITester.h
@@ -218,6 +218,7 @@ class ITester
         {
         case TestType::CPU:
             ss << "CPU TEST ERROR:\n";
+            break;
         case TestType::GPU:
             ss << "GPU TEST ERROR:\n";
         }
diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl
index e2303a2fc..dc3ff5fcd 100644
--- a/22_CppCompat/app_resources/common.hlsl
+++ b/22_CppCompat/app_resources/common.hlsl
@@ -1,74 +1,74 @@
-//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O.
-//// This file is part of the "Nabla Engine".
-//// For conditions of distribution and use, see copyright notice in nabla.h
-
-#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_
-#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_
-
-// because DXC doesn't properly support `_Static_assert`
-// TODO: add a message, and move to macros.h or cpp_compat
-#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; }
-
-#include <boost/preprocessor.hpp>
-
-#include <nbl/builtin/hlsl/cpp_compat.hlsl>
-#include <nbl/builtin/hlsl/type_traits.hlsl>
-
-#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>
-#include <nbl/builtin/hlsl/cpp_compat/vector.hlsl>
-
-#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
-#include <nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl>
-#include <nbl/builtin/hlsl/colorspace/EOTF.hlsl>
-#include <nbl/builtin/hlsl/colorspace/OETF.hlsl>
-
-#include <nbl/builtin/hlsl/random/xoroshiro.hlsl>
-
-#include <nbl/builtin/hlsl/mpl.hlsl>
-#include <nbl/builtin/hlsl/bit.hlsl>
-
-#include <nbl/builtin/hlsl/limits.hlsl>
-
-
-#include <nbl/builtin/hlsl/barycentric/utils.hlsl>
-#include <nbl/builtin/hlsl/member_test_macros.hlsl>
-#include <nbl/builtin/hlsl/device_capabilities_traits.hlsl>
-
-#include <nbl/builtin/hlsl/tgmath.hlsl>
-#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
-
-// tgmath.hlsl and intrinsics.hlsl tests
-
-using namespace nbl::hlsl;
-struct TgmathIntputTestValues
-{
-	float floor;
-	float isnan;
-	float isinf;
-	float powX;
-	float powY;
-	float exp;
-	float exp2;
-	float log;
-	float log2;
-	float absF;
-	int absI;
-	float sqrt;
-	float sin;
-	float cos;
-	float acos;
-	float modf;
-	float round;
-	float roundEven;
-	float trunc;
-	float ceil;
-	float fmaX;
-	float fmaY;
-	float fmaZ;
-	float ldexpArg;
-	int ldexpExp;
-	float modfStruct;
-	float frexpStruct;
+//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O.
+//// This file is part of the "Nabla Engine".
+//// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_
+#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_
+
+// because DXC doesn't properly support `_Static_assert`
+// TODO: add a message, and move to macros.h or cpp_compat
+#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; }
+
+#include <boost/preprocessor.hpp>
+
+#include <nbl/builtin/hlsl/cpp_compat.hlsl>
+#include <nbl/builtin/hlsl/type_traits.hlsl>
+
+#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/vector.hlsl>
+
+#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
+#include <nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl>
+#include <nbl/builtin/hlsl/colorspace/EOTF.hlsl>
+#include <nbl/builtin/hlsl/colorspace/OETF.hlsl>
+
+#include <nbl/builtin/hlsl/random/xoroshiro.hlsl>
+
+#include <nbl/builtin/hlsl/mpl.hlsl>
+#include <nbl/builtin/hlsl/bit.hlsl>
+
+#include <nbl/builtin/hlsl/limits.hlsl>
+
+
+#include <nbl/builtin/hlsl/barycentric/utils.hlsl>
+#include <nbl/builtin/hlsl/member_test_macros.hlsl>
+#include <nbl/builtin/hlsl/device_capabilities_traits.hlsl>
+
+#include <nbl/builtin/hlsl/tgmath.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
+
+// tgmath.hlsl and intrinsics.hlsl tests
+
+using namespace nbl::hlsl;
+struct TgmathIntputTestValues
+{
+	float floor;
+	float isnan;
+	float isinf;
+	float powX;
+	float powY;
+	float exp;
+	float exp2;
+	float log;
+	float log2;
+	float absF;
+	int absI;
+	float sqrt;
+	float sin;
+	float cos;
+	float acos;
+	float modf;
+	float round;
+	float roundEven;
+	float trunc;
+	float ceil;
+	float fmaX;
+	float fmaY;
+	float fmaZ;
+	float ldexpArg;
+	int ldexpExp;
+	float modfStruct;
+	float frexpStruct;
 	float tan;
 	float asin;
 	float atan;
@@ -78,38 +78,38 @@ struct TgmathIntputTestValues
 	float asinh;
 	float acosh;
 	float atanh;
-	float atan2X;
-	float atan2Y;
-	float erf;
-	float erfInv;
-
-	float32_t3 floorVec;
-	float32_t3 isnanVec;
-	float32_t3 isinfVec;
-	float32_t3 powXVec;
-	float32_t3 powYVec;
-	float32_t3 expVec;
-	float32_t3 exp2Vec;
-	float32_t3 logVec;
-	float32_t3 log2Vec;
-	float32_t3 absFVec;
-	int32_t3 absIVec;
-	float32_t3 sqrtVec;
-	float32_t3 sinVec;
-	float32_t3 cosVec;
-	float32_t3 acosVec;
-	float32_t3 modfVec;
-	float32_t3 roundVec;
-	float32_t3 roundEvenVec;
-	float32_t3 truncVec;
-	float32_t3 ceilVec;
-	float32_t3 fmaXVec;
-	float32_t3 fmaYVec;
-	float32_t3 fmaZVec;
-	float32_t3 ldexpArgVec;
-	int32_t3 ldexpExpVec;
-	float32_t3 modfStructVec;
-	float32_t3 frexpStructVec;
+	float atan2X;
+	float atan2Y;
+	float erf;
+	float erfInv;
+
+	float32_t3 floorVec;
+	float32_t3 isnanVec;
+	float32_t3 isinfVec;
+	float32_t3 powXVec;
+	float32_t3 powYVec;
+	float32_t3 expVec;
+	float32_t3 exp2Vec;
+	float32_t3 logVec;
+	float32_t3 log2Vec;
+	float32_t3 absFVec;
+	int32_t3 absIVec;
+	float32_t3 sqrtVec;
+	float32_t3 sinVec;
+	float32_t3 cosVec;
+	float32_t3 acosVec;
+	float32_t3 modfVec;
+	float32_t3 roundVec;
+	float32_t3 roundEvenVec;
+	float32_t3 truncVec;
+	float32_t3 ceilVec;
+	float32_t3 fmaXVec;
+	float32_t3 fmaYVec;
+	float32_t3 fmaZVec;
+	float32_t3 ldexpArgVec;
+	int32_t3 ldexpExpVec;
+	float32_t3 modfStructVec;
+	float32_t3 frexpStructVec;
 	float32_t3 tanVec;
 	float32_t3 asinVec;
 	float32_t3 atanVec;
@@ -119,35 +119,35 @@ struct TgmathIntputTestValues
 	float32_t3 asinhVec;
 	float32_t3 acoshVec;
 	float32_t3 atanhVec;
-	float32_t3 atan2XVec;
-	float32_t3 atan2YVec;
-	float32_t3 erfVec;
-	float32_t3 erfInvVec;
-};
-
-struct TgmathTestValues
-{
-	float floor;
-	int isnan;
-	int isinf;
-	float pow;
-	float exp;
-	float exp2;
-	float log;
-	float log2;
-	float absF;
-	int absI;
-	float sqrt;
-	float sin;
-	float cos;
-	float acos;
-	float modf;
-	float round;
-	float roundEven;
-	float trunc;
-	float ceil;
-	float fma;
-	float ldexp;
+	float32_t3 atan2XVec;
+	float32_t3 atan2YVec;
+	float32_t3 erfVec;
+	float32_t3 erfInvVec;
+};
+
+struct TgmathTestValues
+{
+	float floor;
+	int isnan;
+	int isinf;
+	float pow;
+	float exp;
+	float exp2;
+	float log;
+	float log2;
+	float absF;
+	int absI;
+	float sqrt;
+	float sin;
+	float cos;
+	float acos;
+	float modf;
+	float round;
+	float roundEven;
+	float trunc;
+	float ceil;
+	float fma;
+	float ldexp;
 	float tan;
 	float asin;
 	float atan;
@@ -157,40 +157,40 @@ struct TgmathTestValues
 	float asinh;
 	float acosh;
 	float atanh;
-	float atan2;
-	float erf;
-	float erfInv;
-
-	float32_t3 floorVec;
-
-	// we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below 
-	// and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035
-#ifndef __HLSL_VERSION
-	nbl::hlsl::vector<int, 3> isnanVec;
-	nbl::hlsl::vector<int, 3> isinfVec;
-#else
-	vector<int, 3> isnanVec;
-	vector<int, 3> isinfVec;
-#endif
-	
-	float32_t3 powVec;
-	float32_t3 expVec;
-	float32_t3 exp2Vec;
-	float32_t3 logVec;
-	float32_t3 log2Vec;
-	float32_t3 absFVec;
-	int32_t3 absIVec;
-	float32_t3 sqrtVec;
-	float32_t3 cosVec;
-	float32_t3 sinVec;
-	float32_t3 acosVec;
-	float32_t3 modfVec;
-	float32_t3 roundVec;
-	float32_t3 roundEvenVec;
-	float32_t3 truncVec;
-	float32_t3 ceilVec;
-	float32_t3 fmaVec;
-	float32_t3 ldexpVec;
+	float atan2;
+	float erf;
+	float erfInv;
+
+	float32_t3 floorVec;
+
+	// we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below 
+	// and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035
+#ifndef __HLSL_VERSION
+	nbl::hlsl::vector<int, 3> isnanVec;
+	nbl::hlsl::vector<int, 3> isinfVec;
+#else
+	vector<int, 3> isnanVec;
+	vector<int, 3> isinfVec;
+#endif
+	
+	float32_t3 powVec;
+	float32_t3 expVec;
+	float32_t3 exp2Vec;
+	float32_t3 logVec;
+	float32_t3 log2Vec;
+	float32_t3 absFVec;
+	int32_t3 absIVec;
+	float32_t3 sqrtVec;
+	float32_t3 cosVec;
+	float32_t3 sinVec;
+	float32_t3 acosVec;
+	float32_t3 modfVec;
+	float32_t3 roundVec;
+	float32_t3 roundEvenVec;
+	float32_t3 truncVec;
+	float32_t3 ceilVec;
+	float32_t3 fmaVec;
+	float32_t3 ldexpVec;
 	float32_t3 tanVec;
 	float32_t3 asinVec;
 	float32_t3 atanVec;
@@ -200,258 +200,275 @@ struct TgmathTestValues
 	float32_t3 asinhVec;
 	float32_t3 acoshVec;
 	float32_t3 atanhVec;
-	float32_t3 atan2Vec;
-	float32_t3 erfVec;
-	float32_t3 erfInvVec;
-
-	ModfOutput<float> modfStruct;
-	ModfOutput<float32_t3> modfStructVec;
-	FrexpOutput<float> frexpStruct;
-	FrexpOutput<float32_t3> frexpStructVec;
-
-	void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input)
-	{
-		floor = nbl::hlsl::floor(input.floor);
-		isnan = nbl::hlsl::isnan(input.isnan);
-		isinf = nbl::hlsl::isinf(input.isinf);
-		pow = nbl::hlsl::pow(input.powX, input.powY);
-		exp = nbl::hlsl::exp(input.exp);
-		exp2 = nbl::hlsl::exp2(input.exp2);
-		log = nbl::hlsl::log(input.log);
-		log2 = nbl::hlsl::log2(input.log2);
-		absF = nbl::hlsl::abs(input.absF);
-		absI = nbl::hlsl::abs(input.absI);
-		sqrt = nbl::hlsl::sqrt(input.sqrt);
-		sin = nbl::hlsl::sin(input.sin);
-		cos = nbl::hlsl::cos(input.cos);
-		tan = nbl::hlsl::tan(input.tan);
-		asin = nbl::hlsl::asin(input.asin);
-		atan = nbl::hlsl::atan(input.atan);
-		sinh = nbl::hlsl::sinh(input.sinh);
-		cosh = nbl::hlsl::cosh(input.cosh);
-		tanh = nbl::hlsl::tanh(input.tanh);
-		asinh = nbl::hlsl::asinh(input.asinh);
-		acosh = nbl::hlsl::acosh(input.acosh);
-		atanh = nbl::hlsl::atanh(input.atanh);
-		atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X);
-		erf = nbl::hlsl::erf(input.erf);
-		erfInv = nbl::hlsl::erfInv(input.erfInv);
-		acos = nbl::hlsl::acos(input.acos);
-		modf = nbl::hlsl::modf(input.modf);
-		round = nbl::hlsl::round(input.round);
-		roundEven = nbl::hlsl::roundEven(input.roundEven);
-		trunc = nbl::hlsl::trunc(input.trunc);
-		ceil = nbl::hlsl::ceil(input.ceil);
-		fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ);
-		ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp);
-
-		floorVec = nbl::hlsl::floor(input.floorVec);
-		isnanVec = nbl::hlsl::isnan(input.isnanVec);
-		isinfVec = nbl::hlsl::isinf(input.isinfVec);
-		powVec = nbl::hlsl::pow(input.powXVec, input.powYVec);
-		expVec = nbl::hlsl::exp(input.expVec);
-		exp2Vec = nbl::hlsl::exp2(input.exp2Vec);
-		logVec = nbl::hlsl::log(input.logVec);
-		log2Vec = nbl::hlsl::log2(input.log2Vec);
-		absFVec = nbl::hlsl::abs(input.absFVec);
-		absIVec = nbl::hlsl::abs(input.absIVec);
-		sqrtVec = nbl::hlsl::sqrt(input.sqrtVec);
-		sinVec = nbl::hlsl::sin(input.sinVec);
-		cosVec = nbl::hlsl::cos(input.cosVec);
-		tanVec = nbl::hlsl::tan(input.tanVec);
-		asinVec = nbl::hlsl::asin(input.asinVec);
-		atanVec = nbl::hlsl::atan(input.atanVec);
-		sinhVec = nbl::hlsl::sinh(input.sinhVec);
-		coshVec = nbl::hlsl::cosh(input.coshVec);
-		tanhVec = nbl::hlsl::tanh(input.tanhVec);
-		asinhVec = nbl::hlsl::asinh(input.asinhVec);
-		acoshVec = nbl::hlsl::acosh(input.acoshVec);
-		atanhVec = nbl::hlsl::atanh(input.atanhVec);
-		atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec);
-		acosVec = nbl::hlsl::acos(input.acosVec);
-		modfVec = nbl::hlsl::modf(input.modfVec);
-		roundVec = nbl::hlsl::round(input.roundVec);
-		roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec);
-		truncVec = nbl::hlsl::trunc(input.truncVec);
-		ceilVec = nbl::hlsl::ceil(input.ceilVec);
-		fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec);
-		ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec);
-		erfVec = nbl::hlsl::erf(input.erfVec);
-		erfInvVec = nbl::hlsl::erfInv(input.erfInvVec);
-
-		modfStruct = nbl::hlsl::modfStruct(input.modfStruct);
-		modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec);
-		frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct);
-		frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec);
-	}
-};
-
-struct IntrinsicsIntputTestValues
-{
-	int bitCount;
-	float32_t3 crossLhs;
-	float32_t3 crossRhs;
-	float clampVal;
-	float clampMin;
-	float clampMax;
-	float32_t3 length;
-	float32_t3 normalize;
-	float32_t3 dotLhs;
-	float32_t3 dotRhs;
-	float32_t3x3 determinant;
-	uint32_t findMSB;
-	uint32_t findLSB;
-	float32_t3x3 inverse;
-	float32_t3x3 transpose;
-	float32_t3x3 mulLhs;
-	float32_t3x3 mulRhs;
-	float minA;
-	float minB;
-	float maxA;
-	float maxB;
-	float rsqrt;
-	uint32_t bitReverse;
-	float frac;
-	float mixX;
-	float mixY;
-	float mixA;
-	float sign;
-	float radians;
-	float degrees;
-	float stepEdge;
-	float stepX;
-	float smoothStepEdge0;
-	float smoothStepEdge1;
-	float smoothStepX;
-
-	int32_t3 bitCountVec;
-	float32_t3 clampValVec;
-	float32_t3 clampMinVec;
-	float32_t3 clampMaxVec;
-	uint32_t3 findMSBVec;
-	uint32_t3 findLSBVec;
-	float32_t3 minAVec;
-	float32_t3 minBVec;
-	float32_t3 maxAVec;
-	float32_t3 maxBVec;
-	float32_t3 rsqrtVec;
-	uint32_t3 bitReverseVec;
-	float32_t3 fracVec;
-	float32_t3 mixXVec;
-	float32_t3 mixYVec;
-	float32_t3 mixAVec;
-	float32_t3 signVec;
-	float32_t3 radiansVec;
-	float32_t3 degreesVec;
-	float32_t3 stepEdgeVec;
-	float32_t3 stepXVec;
-	float32_t3 smoothStepEdge0Vec;
-	float32_t3 smoothStepEdge1Vec;
-	float32_t3 smoothStepXVec;
-	float32_t3 faceForwardN;
-	float32_t3 faceForwardI;
-	float32_t3 faceForwardNref;
-	float32_t3 reflectI;
-	float32_t3 reflectN;
-	float32_t3 refractI;
-	float32_t3 refractN;
-	float refractEta;
-};
-
-struct IntrinsicsTestValues
-{
-	int bitCount;
-	float clamp;
-	float length;
-	float dot;
-	float determinant;
-	int findMSB;
-	int findLSB;
-	float min;
-	float max;
-	float rsqrt;
-	float frac;
-	uint32_t bitReverse;
-	float mix;
-	float sign;
-	float radians;
-	float degrees;
-	float step;
-	float smoothStep;
-
-	float32_t3 normalize;
-	float32_t3 cross;
-	int32_t3 bitCountVec;
-	float32_t3 clampVec;
-	uint32_t3 findMSBVec;
-	uint32_t3 findLSBVec;
-	float32_t3 minVec;
-	float32_t3 maxVec;
-	float32_t3 rsqrtVec;
-	uint32_t3 bitReverseVec;
-	float32_t3 fracVec;
-	float32_t3 mixVec;
-	float32_t3 signVec;
-	float32_t3 radiansVec;
-	float32_t3 degreesVec;
-	float32_t3 stepVec;
-	float32_t3 smoothStepVec;
-	float32_t3 faceForward;
-	float32_t3 reflect;
-	float32_t3 refract;
-
-	float32_t3x3 mul;
-	float32_t3x3 transpose;
-	float32_t3x3 inverse;
-
-	void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input)
-	{
-		bitCount = nbl::hlsl::bitCount(input.bitCount);
-		cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs);
-		clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax);
-		length = nbl::hlsl::length(input.length);
-		normalize = nbl::hlsl::normalize(input.normalize);
-		dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs);
-		determinant = nbl::hlsl::determinant(input.determinant);
-		findMSB = nbl::hlsl::findMSB(input.findMSB);
-		findLSB = nbl::hlsl::findLSB(input.findLSB);
-		inverse = nbl::hlsl::inverse(input.inverse);
-		transpose = nbl::hlsl::transpose(input.transpose);
-		mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs);
-		// TODO: fix min and max
-		min = nbl::hlsl::min(input.minA, input.minB);
-		max = nbl::hlsl::max(input.maxA, input.maxB);
-		rsqrt = nbl::hlsl::rsqrt(input.rsqrt);
-		bitReverse = nbl::hlsl::bitReverse(input.bitReverse);
-		frac = nbl::hlsl::fract(input.frac);
-		mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA);
-		sign = nbl::hlsl::sign(input.sign);
-		radians = nbl::hlsl::radians(input.radians);
-		degrees = nbl::hlsl::degrees(input.degrees);
-		step = nbl::hlsl::step(input.stepEdge, input.stepX);
-		smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX);
-
-		bitCountVec = nbl::hlsl::bitCount(input.bitCountVec);
-		clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec);
-		findMSBVec = nbl::hlsl::findMSB(input.findMSBVec);
-		findLSBVec = nbl::hlsl::findLSB(input.findLSBVec);
-		// TODO: fix min and max
-		minVec = nbl::hlsl::min(input.minAVec, input.minBVec);
-		maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec);
-		rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec);
-		bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec);
-		fracVec = nbl::hlsl::fract(input.fracVec);
-		mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec);
-		
-		signVec = nbl::hlsl::sign(input.signVec);
-		radiansVec = nbl::hlsl::radians(input.radiansVec);
-		degreesVec = nbl::hlsl::degrees(input.degreesVec);
-		stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec);
-		smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec);
-		faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref);
-		reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN);
-		refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta);
-	}
-};
-
-#endif
+	float32_t3 atan2Vec;
+	float32_t3 erfVec;
+	float32_t3 erfInvVec;
+
+	ModfOutput<float> modfStruct;
+	ModfOutput<float32_t3> modfStructVec;
+	FrexpOutput<float> frexpStruct;
+	FrexpOutput<float32_t3> frexpStructVec;
+
+	void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input)
+	{
+		floor = nbl::hlsl::floor(input.floor);
+		isnan = nbl::hlsl::isnan(input.isnan);
+		isinf = nbl::hlsl::isinf(input.isinf);
+		pow = nbl::hlsl::pow(input.powX, input.powY);
+		exp = nbl::hlsl::exp(input.exp);
+		exp2 = nbl::hlsl::exp2(input.exp2);
+		log = nbl::hlsl::log(input.log);
+		log2 = nbl::hlsl::log2(input.log2);
+		absF = nbl::hlsl::abs(input.absF);
+		absI = nbl::hlsl::abs(input.absI);
+		sqrt = nbl::hlsl::sqrt(input.sqrt);
+		sin = nbl::hlsl::sin(input.sin);
+		cos = nbl::hlsl::cos(input.cos);
+		tan = nbl::hlsl::tan(input.tan);
+		asin = nbl::hlsl::asin(input.asin);
+		atan = nbl::hlsl::atan(input.atan);
+		sinh = nbl::hlsl::sinh(input.sinh);
+		cosh = nbl::hlsl::cosh(input.cosh);
+		tanh = nbl::hlsl::tanh(input.tanh);
+		asinh = nbl::hlsl::asinh(input.asinh);
+		acosh = nbl::hlsl::acosh(input.acosh);
+		atanh = nbl::hlsl::atanh(input.atanh);
+		atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X);
+		erf = nbl::hlsl::erf(input.erf);
+		erfInv = nbl::hlsl::erfInv(input.erfInv);
+		acos = nbl::hlsl::acos(input.acos);
+		modf = nbl::hlsl::modf(input.modf);
+		round = nbl::hlsl::round(input.round);
+		roundEven = nbl::hlsl::roundEven(input.roundEven);
+		trunc = nbl::hlsl::trunc(input.trunc);
+		ceil = nbl::hlsl::ceil(input.ceil);
+		fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ);
+		ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp);
+
+		floorVec = nbl::hlsl::floor(input.floorVec);
+		isnanVec = nbl::hlsl::isnan(input.isnanVec);
+		isinfVec = nbl::hlsl::isinf(input.isinfVec);
+		powVec = nbl::hlsl::pow(input.powXVec, input.powYVec);
+		expVec = nbl::hlsl::exp(input.expVec);
+		exp2Vec = nbl::hlsl::exp2(input.exp2Vec);
+		logVec = nbl::hlsl::log(input.logVec);
+		log2Vec = nbl::hlsl::log2(input.log2Vec);
+		absFVec = nbl::hlsl::abs(input.absFVec);
+		absIVec = nbl::hlsl::abs(input.absIVec);
+		sqrtVec = nbl::hlsl::sqrt(input.sqrtVec);
+		sinVec = nbl::hlsl::sin(input.sinVec);
+		cosVec = nbl::hlsl::cos(input.cosVec);
+		tanVec = nbl::hlsl::tan(input.tanVec);
+		asinVec = nbl::hlsl::asin(input.asinVec);
+		atanVec = nbl::hlsl::atan(input.atanVec);
+		sinhVec = nbl::hlsl::sinh(input.sinhVec);
+		coshVec = nbl::hlsl::cosh(input.coshVec);
+		tanhVec = nbl::hlsl::tanh(input.tanhVec);
+		asinhVec = nbl::hlsl::asinh(input.asinhVec);
+		acoshVec = nbl::hlsl::acosh(input.acoshVec);
+		atanhVec = nbl::hlsl::atanh(input.atanhVec);
+		atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec);
+		acosVec = nbl::hlsl::acos(input.acosVec);
+		modfVec = nbl::hlsl::modf(input.modfVec);
+		roundVec = nbl::hlsl::round(input.roundVec);
+		roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec);
+		truncVec = nbl::hlsl::trunc(input.truncVec);
+		ceilVec = nbl::hlsl::ceil(input.ceilVec);
+		fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec);
+		ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec);
+		erfVec = nbl::hlsl::erf(input.erfVec);
+		erfInvVec = nbl::hlsl::erfInv(input.erfInvVec);
+
+		modfStruct = nbl::hlsl::modfStruct(input.modfStruct);
+		modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec);
+		frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct);
+		frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec);
+	}
+};
+
+struct IntrinsicsIntputTestValues
+{
+	int bitCount;
+	float32_t3 crossLhs;
+	float32_t3 crossRhs;
+	float clampVal;
+	float clampMin;
+	float clampMax;
+	float32_t3 length;
+	float32_t3 normalize;
+	float32_t3 dotLhs;
+	float32_t3 dotRhs;
+	float32_t3x3 determinant;
+	uint32_t findMSB;
+	uint32_t findLSB;
+	float32_t3x3 inverse;
+	float32_t3x3 transpose;
+	float32_t3x3 mulLhs;
+	float32_t3x3 mulRhs;
+	float minA;
+	float minB;
+	float maxA;
+	float maxB;
+	float rsqrt;
+	uint32_t bitReverse;
+	float frac;
+	float mixX;
+	float mixY;
+	float mixA;
+	float sign;
+	float radians;
+	float degrees;
+	float stepEdge;
+	float stepX;
+	float smoothStepEdge0;
+	float smoothStepEdge1;
+	float smoothStepX;
+	uint32_t addCarryA;
+	uint32_t addCarryB;
+	uint32_t subBorrowA;
+	uint32_t subBorrowB;
+
+	int32_t3 bitCountVec;
+	float32_t3 clampValVec;
+	float32_t3 clampMinVec;
+	float32_t3 clampMaxVec;
+	uint32_t3 findMSBVec;
+	uint32_t3 findLSBVec;
+	float32_t3 minAVec;
+	float32_t3 minBVec;
+	float32_t3 maxAVec;
+	float32_t3 maxBVec;
+	float32_t3 rsqrtVec;
+	uint32_t3 bitReverseVec;
+	float32_t3 fracVec;
+	float32_t3 mixXVec;
+	float32_t3 mixYVec;
+	float32_t3 mixAVec;
+	float32_t3 signVec;
+	float32_t3 radiansVec;
+	float32_t3 degreesVec;
+	float32_t3 stepEdgeVec;
+	float32_t3 stepXVec;
+	float32_t3 smoothStepEdge0Vec;
+	float32_t3 smoothStepEdge1Vec;
+	float32_t3 smoothStepXVec;
+	float32_t3 faceForwardN;
+	float32_t3 faceForwardI;
+	float32_t3 faceForwardNref;
+	float32_t3 reflectI;
+	float32_t3 reflectN;
+	float32_t3 refractI;
+	float32_t3 refractN;
+	float refractEta;
+	uint32_t3 addCarryAVec;
+	uint32_t3 addCarryBVec;
+	uint32_t3 subBorrowAVec;
+	uint32_t3 subBorrowBVec;
+};
+
+struct IntrinsicsTestValues
+{
+	int bitCount;
+	float clamp;
+	float length;
+	float dot;
+	float determinant;
+	int findMSB;
+	int findLSB;
+	float min;
+	float max;
+	float rsqrt;
+	float frac;
+	uint32_t bitReverse;
+	float mix;
+	float sign;
+	float radians;
+	float degrees;
+	float step;
+	float smoothStep;
+
+	float32_t3 normalize;
+	float32_t3 cross;
+	int32_t3 bitCountVec;
+	float32_t3 clampVec;
+	uint32_t3 findMSBVec;
+	uint32_t3 findLSBVec;
+	float32_t3 minVec;
+	float32_t3 maxVec;
+	float32_t3 rsqrtVec;
+	uint32_t3 bitReverseVec;
+	float32_t3 fracVec;
+	float32_t3 mixVec;
+	float32_t3 signVec;
+	float32_t3 radiansVec;
+	float32_t3 degreesVec;
+	float32_t3 stepVec;
+	float32_t3 smoothStepVec;
+	float32_t3 faceForward;
+	float32_t3 reflect;
+	float32_t3 refract;
+
+	float32_t3x3 mul;
+	float32_t3x3 transpose;
+	float32_t3x3 inverse;
+
+	spirv::AddCarryOutput<uint32_t> addCarry;
+	spirv::SubBorrowOutput<uint32_t> subBorrow;
+	spirv::AddCarryOutput<uint32_t3> addCarryVec;
+	spirv::SubBorrowOutput<uint32_t3> subBorrowVec;
+
+	void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input)
+	{
+		bitCount = nbl::hlsl::bitCount(input.bitCount);
+		cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs);
+		clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax);
+		length = nbl::hlsl::length(input.length);
+		normalize = nbl::hlsl::normalize(input.normalize);
+		dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs);
+		determinant = nbl::hlsl::determinant(input.determinant);
+		findMSB = nbl::hlsl::findMSB(input.findMSB);
+		findLSB = nbl::hlsl::findLSB(input.findLSB);
+		inverse = nbl::hlsl::inverse(input.inverse);
+		transpose = nbl::hlsl::transpose(input.transpose);
+		mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs);
+		// TODO: fix min and max
+		min = nbl::hlsl::min(input.minA, input.minB);
+		max = nbl::hlsl::max(input.maxA, input.maxB);
+		rsqrt = nbl::hlsl::rsqrt(input.rsqrt);
+		bitReverse = nbl::hlsl::bitReverse(input.bitReverse);
+		frac = nbl::hlsl::fract(input.frac);
+		mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA);
+		sign = nbl::hlsl::sign(input.sign);
+		radians = nbl::hlsl::radians(input.radians);
+		degrees = nbl::hlsl::degrees(input.degrees);
+		step = nbl::hlsl::step(input.stepEdge, input.stepX);
+		smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX);
+
+		bitCountVec = nbl::hlsl::bitCount(input.bitCountVec);
+		clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec);
+		findMSBVec = nbl::hlsl::findMSB(input.findMSBVec);
+		findLSBVec = nbl::hlsl::findLSB(input.findLSBVec);
+		// TODO: fix min and max
+		minVec = nbl::hlsl::min(input.minAVec, input.minBVec);
+		maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec);
+		rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec);
+		bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec);
+		fracVec = nbl::hlsl::fract(input.fracVec);
+		mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec);
+		
+		signVec = nbl::hlsl::sign(input.signVec);
+		radiansVec = nbl::hlsl::radians(input.radiansVec);
+		degreesVec = nbl::hlsl::degrees(input.degreesVec);
+		stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec);
+		smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec);
+		faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref);
+		reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN);
+		refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta);
+		addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB);
+		subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB);
+		addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec);
+		subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec);
+	}
+};
+
+#endif
diff --git a/22_CppCompat/app_resources/test.comp.hlsl b/22_CppCompat/app_resources/test.comp.hlsl
index 98be76c53..17c59f970 100644
--- a/22_CppCompat/app_resources/test.comp.hlsl
+++ b/22_CppCompat/app_resources/test.comp.hlsl
@@ -3,9 +3,6 @@
 //// For conditions of distribution and use, see copyright notice in nabla.h
 #include "app_resources/common.hlsl"
 
-template<typename L, typename R>
-const static bool is_same_v = nbl::hlsl::is_same_v<L,R>;
-
 
 struct PushConstants
 {
@@ -88,6 +85,7 @@ struct device_capabilities2
 };
 
 [numthreads(8, 8, 1)]
+[shader("compute")]
 void main(uint3 invocationID : SV_DispatchThreadID)
 {
     fill(invocationID, 1);
@@ -157,9 +155,9 @@ void main(uint3 invocationID : SV_DispatchThreadID)
     {
         static const uint16_t TEST_VALUE_0 = 5;
         static const uint32_t TEST_VALUE_1 = 0x80000000u;
-        static const uint32_t TEST_VALUE_2 = 0x8000000000000000u;
+        static const uint32_t TEST_VALUE_2 = 0x8000000000000000u; // TODO: Przmek is this intended? it warns because its too big from uint32_t
         static const uint32_t TEST_VALUE_3 = 0x00000001u;
-        static const uint32_t TEST_VALUE_4 = 0x0000000000000001u;
+        static const uint32_t TEST_VALUE_4 = 0x0000000000000001u; // TODO: Przmek is this intended? it warns because its too big from uint32_t
         
 
         fill(invocationID, 5.01);
diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt
index 026add505..a2feb2cb8 100644
--- a/24_ColorSpaceTest/CMakeLists.txt
+++ b/24_ColorSpaceTest/CMakeLists.txt
@@ -32,4 +32,49 @@ add_test(NAME NBL_IMAGE_HASH_RUN_TESTS
 	COMMAND "$<TARGET_FILE:${EXECUTABLE_NAME}>" --test hash
 	WORKING_DIRECTORY "$<TARGET_FILE_DIR:${EXECUTABLE_NAME}>"
 	COMMAND_EXPAND_LISTS
+)
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/present.frag.hlsl
+    app_resources/push_constants.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/present.frag.hlsl",
+        "KEY": "present",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
 )
\ No newline at end of file
diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp
index 84c55ef3a..750756321 100644
--- a/24_ColorSpaceTest/main.cpp
+++ b/24_ColorSpaceTest/main.cpp
@@ -1,6 +1,7 @@
 ﻿// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 #include "nbl/examples/examples.hpp"
 
 #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h"
@@ -160,26 +161,24 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B
 					return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!");
 
 				// Load Custom Shader
-				auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr<IShader>
-					{
-						IAssetLoader::SAssetLoadParams lp = {};
-						lp.logger = m_logger.get();
-						lp.workingDirectory = ""; // virtual root
-						auto assetBundle = m_assetMgr->getAsset(relPath, lp);
-						const auto assets = assetBundle.getContents();
-						if (assets.empty())
-							return nullptr;
-
-						// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
-						auto source = IAsset::castDown<IShader>(assets[0]);
-						if (!source)
-							return nullptr;
+				auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
+				{
+					IAssetLoader::SAssetLoadParams lp = {};
+					lp.logger = m_logger.get();
+					lp.workingDirectory = "app_resources";
+
+					auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+					auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
+					const auto assets = assetBundle.getContents();
+					if (assets.empty())
+						return nullptr;
 
-						return m_device->compileShader({ source.get() });
-					};
-				auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl");
+					auto shader = IAsset::castDown<IShader>(assets[0]);
+					return shader;
+				};
+				auto fragmentShader = loadPrecompiledShader.operator()<"present">(); // "app_resources/present.frag.hlsl"
 				if (!fragmentShader)
-					return logFail("Failed to Load and Compile Fragment Shader!");
+					return logFail("Failed to load precompiled fragment shader!");
 
 				// Now surface indep resources
 				m_semaphore = m_device->createSemaphore(m_submitIx);
diff --git a/27_MPMCScheduler/app_resources/common.hlsl b/27_MPMCScheduler/app_resources/common.hlsl
index 2fb8971ad..2783f13a2 100644
--- a/27_MPMCScheduler/app_resources/common.hlsl
+++ b/27_MPMCScheduler/app_resources/common.hlsl
@@ -1,8 +1,8 @@
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 
-NBL_CONSTEXPR uint32_t WorkgroupSizeX = 8;
-NBL_CONSTEXPR uint32_t WorkgroupSizeY = 8;
-NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 8;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 8;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY;
 
 struct PushConstants
 {
diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl
index 07c2ec8cf..02ae4ff40 100644
--- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl
+++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl
@@ -68,8 +68,6 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase
 		// This one shows up a lot so we give it a name
 		const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u;
 
-		ternary_operator<complex_t<scalar_t> > ternaryOp;
-
 		// Since every two consecutive columns are stored as one packed column, we divide the index by 2 to get the index of that packed column
 		const uint32_t firstIndex = workgroup::SubgroupContiguousIndex() / 2;
 		int32_t paddedIndex = int32_t(firstIndex) - pushConstants.halfPadding;
@@ -82,7 +80,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase
 		{
 			// If mirrored, we need to invert which thread is loading lo and which is loading hi
 			// If using zero-padding, useful to find out if we're outside of [0,1) bounds
-			bool invert = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength;
+			bool inPadding = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength;
 			int32_t wrappedIndex = paddedIndex < 0 ? ~paddedIndex : paddedIndex; // ~x = - x - 1 in two's complement (except maybe at the borders of representable range) 
 			wrappedIndex = paddedIndex < pushConstants.imageHalfRowLength ? wrappedIndex : pushConstants.imageRowLength + ~paddedIndex;
 			const complex_t<scalar_t> loOrHi = colMajorAccessor.get(colMajorOffset(wrappedIndex, y));
@@ -93,17 +91,17 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase
 
 			if (glsl::gl_WorkGroupID().x)
 			{
-				complex_t<scalar_t> lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi);
-				complex_t<scalar_t> hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi);
+				complex_t<scalar_t> lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi);
+				complex_t<scalar_t> hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi);
 				fft::unpack<scalar_t>(lo, hi);
 
 				// --------------------------------------------------- MIRROR PADDING -------------------------------------------------------------------------------------------
 				#ifdef MIRROR_PADDING
-				preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, hi, lo);
+				preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, hi, lo);
 				// ----------------------------------------------------- ZERO PADDING -------------------------------------------------------------------------------------------
 				#else
 				const complex_t<scalar_t> Zero = { scalar_t(0), scalar_t(0) };
-				preloaded[localElementIndex] = ternaryOp(invert, Zero, ternaryOp(oddThread, hi, lo));
+				preloaded[localElementIndex] = nbl::hlsl::select(inPadding, Zero, nbl::hlsl::select(oddThread, hi, lo));
 				#endif
 				// ------------------------------------------------ END PADDING DIVERGENCE ----------------------------------------------------------------------------------------
 			}
@@ -116,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase
 				const complex_t<scalar_t> evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() };
 				// Odd thread writes `hi = Z1 + iN1`
 				const complex_t<scalar_t> oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() };
-				preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, oddThreadHi, evenThreadLo);
+				preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, oddThreadHi, evenThreadLo);
 			}
 			paddedIndex += WorkgroupSize / 2;
 		}
diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl
index eaecb5d0f..eca81e859 100644
--- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl
+++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl
@@ -46,8 +46,6 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas
 		// This one shows up a lot so we give it a name
 		const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u;
 
-		ternary_operator<complex_t<scalar_t> > ternaryOp;
-
 		if (glsl::gl_WorkGroupID().x)
 		{
 			// Even thread must index a y corresponding to an even element of the previous FFT pass, and the odd thread must index its DFT Mirror
@@ -72,10 +70,10 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas
 					const vector <scalar_t, 2> loOrHiVector = vector <scalar_t, 2>(loOrHi.real(), loOrHi.imag());
 					const vector <scalar_t, 2> otherThreadloOrHiVector = glsl::subgroupShuffleXor< vector <scalar_t, 2> >(loOrHiVector, 1u);
 					const complex_t<scalar_t> otherThreadLoOrHi = { otherThreadloOrHiVector.x, otherThreadloOrHiVector.y };
-					complex_t<scalar_t> lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi);
-					complex_t<scalar_t> hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi);
+					complex_t<scalar_t> lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi);
+					complex_t<scalar_t> hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi);
 					fft::unpack<scalar_t>(lo, hi);
-					preloaded[channel][localElementIndex] = ternaryOp(oddThread, hi, lo);
+					preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, hi, lo);
 
 					packedColumnIndex += WorkgroupSize / 2;
 				}
@@ -112,7 +110,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas
 					const complex_t<scalar_t> evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() };
 					// Odd thread writes `hi = Z1 + iN1`
 					const complex_t<scalar_t> oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() };
-					preloaded[channel][localElementIndex] = ternaryOp(oddThread, oddThreadHi, evenThreadLo);
+					preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, oddThreadHi, evenThreadLo);
 
 					packedColumnIndex += WorkgroupSize / 2;
 				}
diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt
index c3a0fa47e..0928d3b61 100644
--- a/62_CAD/CMakeLists.txt
+++ b/62_CAD/CMakeLists.txt
@@ -61,4 +61,72 @@ else()
 	foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS)
 		target_include_directories(${EXECUTABLE_NAME} PUBLIC $<TARGET_PROPERTY:${NBL_TARGET},INCLUDE_DIRECTORIES>)
 	endforeach()
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+	shaders/globals.hlsl
+	shaders/runtimeDeviceConfigCaps.hlsl
+	shaders/main_pipeline/common.hlsl
+	shaders/main_pipeline/dtm.hlsl
+	shaders/main_pipeline/fragment.hlsl
+	shaders/main_pipeline/fragment_shader.hlsl
+	shaders/main_pipeline/fragment_shader_debug.hlsl
+	shaders/main_pipeline/line_style.hlsl
+	shaders/main_pipeline/resolve_alphas.hlsl
+	shaders/main_pipeline/vertex_shader.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(REQUIRED_CAPS [=[
+{
+  "kind": "features",
+  "name": "fragmentShaderPixelInterlock",
+  "type": "bool",
+  "values": [1]
+}
+]=])
+
+set(JSON [=[
+[
+	{
+		"INPUT": "shaders/main_pipeline/vertex_shader.hlsl",
+		"KEY": "main_pipeline_vertex_shader",
+		"CAPS": [${REQUIRED_CAPS}]
+    },
+    {
+		"INPUT": "shaders/main_pipeline/fragment.hlsl",
+		"KEY": "main_pipeline_fragment_shader",
+		"CAPS": [${REQUIRED_CAPS}]
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp
index f4a886791..905177f6b 100644
--- a/62_CAD/main.cpp
+++ b/62_CAD/main.cpp
@@ -1,5 +1,5 @@
 ﻿// TODO: Copyright notice
-
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 #include "nbl/examples/examples.hpp"
 
@@ -929,84 +929,29 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio
 		smart_refctd_ptr<IShader> mainPipelineVertexShader = {};
 		std::array<smart_refctd_ptr<IShader>, 2u> geoTexturePipelineShaders = {};
 		{
-			smart_refctd_ptr<IShaderCompiler::CCache> shaderReadCache = nullptr;
-			smart_refctd_ptr<IShaderCompiler::CCache> shaderWriteCache = core::make_smart_refctd_ptr<IShaderCompiler::CCache>();
-			auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin";
-
+			// Load Custom Shader
+			auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
 			{
-				core::smart_refctd_ptr<system::IFile> shaderReadCacheFile;
+				IAssetLoader::SAssetLoadParams lp = {};
+				lp.logger = m_logger.get();
+				lp.workingDirectory = "app_resources";
+
+				auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+				auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
+				const auto assets = assetBundle.getContents();
+				if (assets.empty())
 				{
-					system::ISystem::future_t<core::smart_refctd_ptr<system::IFile>> future;
-					m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ);
-					if (future.wait())
-					{
-						future.acquire().move_into(shaderReadCacheFile);
-						if (shaderReadCacheFile)
-						{
-							const size_t size = shaderReadCacheFile->getSize();
-							if (size > 0ull)
-							{
-								std::vector<uint8_t> contents(size);
-								system::IFile::success_t succ;
-								shaderReadCacheFile->read(succ, contents.data(), 0, size);
-								if (succ)
-									shaderReadCache = IShaderCompiler::CCache::deserialize(contents);
-							}
-						}
-					}
-					else
-						m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR);
+					m_logger->log("Failed to load a precompiled ahsder.", ILogger::ELL_ERROR);
+					return nullptr;
 				}
+					
 
-			}
-
-			// Load Custom Shader
-			auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr<IShader>
-				{
-					IAssetLoader::SAssetLoadParams lp = {};
-					lp.logger = m_logger.get();
-					lp.workingDirectory = ""; // virtual root
-					auto assetBundle = m_assetMgr->getAsset(relPath, lp);
-					const auto assets = assetBundle.getContents();
-					if (assets.empty())
-						return nullptr;
-
-					// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
-					auto source = IAsset::castDown<IShader>(assets[0]);
-					if (!source)
-						return nullptr;
-	
-					return m_device->compileShader( ILogicalDevice::SShaderCreationParameters { .source = source.get(), .readCache = shaderReadCache.get(), .writeCache = shaderWriteCache.get(), .stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY });
-				};
+				auto shader = IAsset::castDown<IShader>(assets[0]);
+				return shader;
+			};
 
-			mainPipelineFragmentShaders = loadCompileShader("../shaders/main_pipeline/fragment.hlsl");
-			mainPipelineVertexShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl");
-			
-			core::smart_refctd_ptr<system::IFile> shaderWriteCacheFile;
-			{
-				system::ISystem::future_t<core::smart_refctd_ptr<system::IFile>> future;
-				m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json
-				m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE);
-				if (future.wait())
-				{
-					future.acquire().move_into(shaderWriteCacheFile);
-					if (shaderWriteCacheFile)
-					{
-						auto serializedCache = shaderWriteCache->serialize();
-						if (shaderWriteCacheFile)
-						{
-							system::IFile::success_t succ;
-							shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize());
-							if (!succ)
-								m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR);
-						}
-					}
-					else
-						m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR);
-				}
-				else
-					m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR);
-			}
+			mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment.hlsl"
+			mainPipelineVertexShader = loadPrecompiledShader.operator() <"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl"
 		}
 
 		// Shared Blend Params between pipelines
diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl
index 691cd3d3b..f2053e003 100644
--- a/62_CAD/shaders/geotexture/common.hlsl
+++ b/62_CAD/shaders/geotexture/common.hlsl
@@ -4,7 +4,7 @@
 #include "../globals.hlsl"
 
 // Handle multiple geo textures, separate set, array of texture? index allocator? or multiple sets?
-NBL_CONSTEXPR uint32_t MaxGeoTextures = 256; 
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxGeoTextures = 256; 
 
 // GeoTexture Oriented Bounding Box
 struct GeoTextureOBB
diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl
index 5c3681910..ead5a5fd9 100644
--- a/62_CAD/shaders/globals.hlsl
+++ b/62_CAD/shaders/globals.hlsl
@@ -1,12 +1,6 @@
 #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
 #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_
 
-#ifdef __HLSL_VERSION
-#ifndef NBL_USE_SPIRV_BUILTINS
-#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps
-#endif
-#endif
-
 // TODO[Erfan]: Turn off in the future, but keep enabled to test
 // #define NBL_FORCE_EMULATED_FLOAT_64
 
@@ -352,8 +346,8 @@ static_assert(offsetof(CurveBox, curveMax[0]) == 56u);
 static_assert(sizeof(CurveBox) == 80u);
 #endif
 
-NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff;
-NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits<float>::infinity;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidRigidSegmentIndex = 0xffffffff;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits<float>::infinity;
 
 
 // TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like  stipple patterns) and requires upper_bound to figure out the color based on height value.
@@ -547,27 +541,27 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs)
 }
 #endif
 
-NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128;
-NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture
-NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits;
-NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u;
-NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits<uint32_t>::max;
-NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits<uint32_t>::max;
-NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects;
-NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
-NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
-NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t ImagesBindingArraySize = 128;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t AlphaBits = 32u - MainObjectIdxBits;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits<uint32_t>::max;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits<uint32_t>::max;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits<uint32_t>::max;
 
 // Hatches
-NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y;
-NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis);
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis);
 
 // Text or MSDF Hatches
-NBL_CONSTEXPR float MSDFPixelRange = 4.0f;
-NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f;
-NBL_CONSTEXPR float MSDFSize = 64.0f; 
-NBL_CONSTEXPR uint32_t MSDFMips = 4; 
-NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; 
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRange = 4.0f;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFSize = 64.0f; 
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MSDFMips = 4; 
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float HatchFillMSDFSceenSpaceSize = 8.0; 
 
 inline bool isInvalidGridDtmHeightValue(float value)
 {
diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl
index 90394e935..df566f002 100644
--- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl
+++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl
@@ -706,19 +706,19 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
 
             if (corner.x == 0.0f && corner.y == 0.0f)
             {
-                dilationVector.x = ieee754::flipSign(dilationVector.x);
+                dilationVector.x = ieee754::flipSign(dilationVector.x, true);
                 uvOffset.x = -uvOffset.x;
                 uvOffset.y = -uvOffset.y;
             }
             else if (corner.x == 0.0f && corner.y == 1.0f)
             {
-                dilationVector.x = ieee754::flipSign(dilationVector.x);
-                dilationVector.y = ieee754::flipSign(dilationVector.y);
+                dilationVector.x = ieee754::flipSign(dilationVector.x, true);
+                dilationVector.y = ieee754::flipSign(dilationVector.y, true);
                 uvOffset.x = -uvOffset.x;
             }
             else if (corner.x == 1.0f && corner.y == 1.0f)
             {
-                dilationVector.y = ieee754::flipSign(dilationVector.y);
+                dilationVector.y = ieee754::flipSign(dilationVector.y, true);
             }
             else if (corner.x == 1.0f && corner.y == 0.0f)
             {
@@ -730,7 +730,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID)
 
             pfloat64_t2 worldSpaceExtentsYAxisFlipped;
             worldSpaceExtentsYAxisFlipped.x = worldSpaceExtents.x;
-            worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y);
+            worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y, true);
             const pfloat64_t2 vtxPos = topLeft + worldSpaceExtentsYAxisFlipped * _static_cast<pfloat64_t2>(corner);
             const pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector;
 
diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt
index aae93590d..af46da896 100644
--- a/64_EmulatedFloatTest/CMakeLists.txt
+++ b/64_EmulatedFloatTest/CMakeLists.txt
@@ -27,4 +27,55 @@ if(MSVC)
   target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict")
 else()
   target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math)
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+	app_resources/common.hlsl
+    app_resources/test.comp.hlsl
+	app_resources/benchmark/benchmark.comp.hlsl
+	app_resources/benchmark/common.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/test.comp.hlsl",
+        "KEY": "test",
+    },
+	{
+        "INPUT": "app_resources/benchmark/benchmark.comp.hlsl",
+        "KEY": "benchmark",
+    },
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl
index b31da3737..a515f6bcb 100644
--- a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl
+++ b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl
@@ -66,6 +66,7 @@ uint64_t calcIntegral()
 }
 
 [numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)]
+[shader("compute")]
 void main(uint3 invocationID : SV_DispatchThreadID)
 {
 	static const uint32_t NativeToEmulatedRatio = 6;
diff --git a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl
index 98875c42f..7f6d1dec1 100644
--- a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl
+++ b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl
@@ -4,10 +4,10 @@
 
 #include <nbl/builtin/hlsl/cpp_compat.hlsl>
 
-NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u;
-NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u;
-NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u;
-NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u;
 
 enum EF64_BENCHMARK_MODE
 {
diff --git a/64_EmulatedFloatTest/app_resources/common.hlsl b/64_EmulatedFloatTest/app_resources/common.hlsl
index aea1ce94d..0e8762c5a 100644
--- a/64_EmulatedFloatTest/app_resources/common.hlsl
+++ b/64_EmulatedFloatTest/app_resources/common.hlsl
@@ -8,7 +8,7 @@
 #include <nbl/builtin/hlsl/portable/vector_t.hlsl>
 #include <nbl/builtin/hlsl/portable/matrix_t.hlsl>
 
-NBL_CONSTEXPR uint32_t WORKGROUP_SIZE = 1;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WORKGROUP_SIZE = 1;
 
 using namespace nbl;
 using namespace hlsl;
diff --git a/64_EmulatedFloatTest/app_resources/test.comp.hlsl b/64_EmulatedFloatTest/app_resources/test.comp.hlsl
index 7681e80a5..e95eadd49 100644
--- a/64_EmulatedFloatTest/app_resources/test.comp.hlsl
+++ b/64_EmulatedFloatTest/app_resources/test.comp.hlsl
@@ -12,6 +12,7 @@
 PushConstants pc;
 
 [numthreads(WORKGROUP_SIZE, 1, 1)]
+[shader("compute")]
 void main(uint3 invocationID : SV_DispatchThreadID)
 {
     const nbl::hlsl::emulated_float64_t<false, true> a = nbl::hlsl::bit_cast<emulated_float64_t<false, true> >(pc.a);
diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp
index 3fc635e87..a4f177f16 100644
--- a/64_EmulatedFloatTest/main.cpp
+++ b/64_EmulatedFloatTest/main.cpp
@@ -1,7 +1,7 @@
 // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
-
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 #include "nbl/examples/examples.hpp"
 
@@ -262,9 +262,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso
                 {
                     IAssetLoader::SAssetLoadParams lp = {};
                     lp.logger = base.m_logger.get();
-                    lp.workingDirectory = ""; // virtual root
-                    // this time we load a shader directly from a file
-                    auto assetBundle = base.m_assetMgr->getAsset("app_resources/test.comp.hlsl", lp);
+                    lp.workingDirectory = "app_resources"; // virtual root
+
+                    auto key = nbl::this_example::builtin::build::get_spirv_key<"test">(base.m_device.get());
+                    auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp);
                     const auto assets = assetBundle.getContents();
                     if (assets.empty())
                     {
@@ -274,26 +275,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso
 
                     // It would be super weird if loading a shader from a file produced more than 1 asset
                     assert(assets.size() == 1);
-                    smart_refctd_ptr<IShader> source = IAsset::castDown<IShader>(assets[0]);
-
-                    auto* compilerSet = base.m_assetMgr->getCompilerSet();
-
-                    nbl::asset::IShaderCompiler::SCompilerOptions options = {};
-                    options.stage = ESS_COMPUTE;
-                    options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion;
-                    options.spirvOptimizer = nullptr;
-                    options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT;
-                    options.preprocessorOptions.sourceIdentifier = source->getFilepathHint();
-                    options.preprocessorOptions.logger = base.m_logger.get();
-                    options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder();
-
-                    auto spirv = compilerSet->compileToSPIRV(source.get(), options);
-
-                    shader = base.m_device->compileShader({spirv.get()});
+                    shader = IAsset::castDown<IShader>(assets[0]);
                 }
 
                 if (!shader)
-                    base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n");
+                    base.logFail("Failed to load precompiled \"test\" shader!\n");
 
                 nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = {
                     {
@@ -928,9 +914,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso
                 {
                     IAssetLoader::SAssetLoadParams lp = {};
                     lp.logger = base.m_logger.get();
-                    lp.workingDirectory = ""; // virtual root
+                    lp.workingDirectory = "app_resources"; // virtual root
                     // this time we load a shader directly from a file
-                    auto assetBundle = base.m_assetMgr->getAsset("app_resources/benchmark/benchmark.comp.hlsl", lp);
+                    auto key = nbl::this_example::builtin::build::get_spirv_key<"benchmark">(m_device.get());
+                    auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp);
                     const auto assets = assetBundle.getContents();
                     if (assets.empty())
                     {
@@ -940,26 +927,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso
 
                     // It would be super weird if loading a shader from a file produced more than 1 asset
                     assert(assets.size() == 1);
-                    smart_refctd_ptr<IShader> source = IAsset::castDown<IShader>(assets[0]);
-
-                    auto* compilerSet = base.m_assetMgr->getCompilerSet();
-
-                    IShaderCompiler::SCompilerOptions options = {};
-                    options.stage = ESS_COMPUTE;
-                    options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion;
-                    options.spirvOptimizer = nullptr;
-                    options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT;
-                    options.preprocessorOptions.sourceIdentifier = source->getFilepathHint();
-                    options.preprocessorOptions.logger = base.m_logger.get();
-                    options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder();
-
-                    auto spirv = compilerSet->compileToSPIRV(source.get(), options);
-
-                    shader = base.m_device->compileShader({spirv.get()});
+                    shader = IAsset::castDown<IShader>(assets[0]);
                 }
 
                 if (!shader)
-                    base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n");
+                    base.logFail("Failed to load precompiled \"benchmark\" shader!\n");
 
                 nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = {
                     {
diff --git a/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl b/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl
index fcf510b21..2248784e9 100644
--- a/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl
+++ b/66_HLSLBxDFTests/app_resources/test_compile.comp.hlsl
@@ -8,14 +8,14 @@
 
 using namespace nbl::hlsl;
 
+using spectral_t = vector<float, 3>;
 using ray_dir_info_t = bxdf::ray_dir_info::SBasic<float>;
-using iso_interaction = bxdf::surface_interactions::SIsotropic<ray_dir_info_t>;
+using iso_interaction = bxdf::surface_interactions::SIsotropic<ray_dir_info_t, spectral_t>;
 using aniso_interaction = bxdf::surface_interactions::SAnisotropic<iso_interaction>;
 using sample_t = bxdf::SLightSample<ray_dir_info_t>;
 using iso_cache = bxdf::SIsotropicMicrofacetCache<float>;
 using aniso_cache = bxdf::SAnisotropicMicrofacetCache<iso_cache>;
 using quotient_pdf_t = sampling::quotient_and_pdf<float32_t3, float>;
-using spectral_t = vector<float, 3>;
 
 using iso_config_t = bxdf::SConfiguration<sample_t, iso_interaction, spectral_t>;
 using aniso_config_t = bxdf::SConfiguration<sample_t, aniso_interaction, spectral_t>;
@@ -32,6 +32,7 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
     bxdf::reflection::SBeckmannAnisotropic<aniso_microfacet_config_t> beckmannAnisoBRDF;
     bxdf::reflection::SGGXIsotropic<iso_microfacet_config_t> ggxIsoBRDF;
     bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t> ggxAnisoBRDF;
+    bxdf::reflection::SIridescent<iso_microfacet_config_t> iridBRDF;
 
     bxdf::transmission::SLambertian<iso_config_t> lambertianBSDF;
     bxdf::transmission::SOrenNayar<iso_config_t> orenNayarBSDF;
@@ -42,6 +43,7 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
     bxdf::transmission::SBeckmannDielectricAnisotropic<aniso_microfacet_config_t> beckmannAnisoBSDF;
     bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t> ggxIsoBSDF;
     bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t> ggxAnisoBSDF;
+    bxdf::transmission::SIridescent<iso_microfacet_config_t> iridBSDF;
 
 
     // do some nonsense calculations, but call all the relevant functions
@@ -76,6 +78,9 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
     s = ggxAnisoBRDF.generate(anisointer, u.xy, cache);
     L += s.L.direction;
 
+    qp = iridBRDF.quotient_and_pdf(s, anisointer, cache);
+    L -= qp.quotient;
+
     qp = ggxAnisoBRDF.quotient_and_pdf(s, anisointer, cache);
     L -= qp.quotient;
 
diff --git a/66_HLSLBxDFTests/app_resources/test_components.hlsl b/66_HLSLBxDFTests/app_resources/test_components.hlsl
index 9631db05d..a2db7ef53 100644
--- a/66_HLSLBxDFTests/app_resources/test_components.hlsl
+++ b/66_HLSLBxDFTests/app_resources/test_components.hlsl
@@ -3,11 +3,6 @@
 
 #include "tests_common.hlsl"
 
-namespace nbl
-{
-namespace hlsl
-{
-
 template<class BxDF, bool aniso = false>    // only for cook torrance bxdfs
 struct TestNDF : TestBxDF<BxDF>
 {
@@ -75,7 +70,7 @@ struct TestNDF : TestBxDF<BxDF>
         }
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -87,12 +82,13 @@ struct TestNDF : TestBxDF<BxDF>
 
         float reflectance;
         bool transmitted;
+        bool isNdfInfinity;
         NBL_IF_CONSTEXPR(aniso)
         {
             dg1_query_type dq = base_t::bxdf.ndf.template createDG1Query<aniso_interaction, aniso_cache>(base_t::anisointer, cache);
-            fresnel_type _f = bxdf::impl::getOrientedFresnel<fresnel_type, base_t::bxdf_t::IsBSDF>::__call(base_t::bxdf.fresnel, base_t::anisointer.getNdotV());
-            quant_query_type qq = bxdf::impl::quant_query_helper<ndf_type, fresnel_type, base_t::bxdf_t::IsBSDF>::template __call<aniso_cache>(base_t::bxdf.ndf, _f, cache);
-            quant_type DG1 = base_t::bxdf.ndf.template DG1<sample_t, aniso_interaction>(dq, qq, s, base_t::anisointer);
+            fresnel_type _f = base_t::bxdf_t::__getOrientedFresnel(base_t::bxdf.fresnel, base_t::anisointer.getNdotV());
+            quant_query_type qq = bxdf::impl::quant_query_helper<ndf_type, fresnel_type, base_t::bxdf_t::IsBSDF>::template __call<aniso_interaction, aniso_cache>(base_t::bxdf.ndf, _f, base_t::anisointer, cache);
+            quant_type DG1 = base_t::bxdf.ndf.template DG1<sample_t, aniso_interaction>(dq, qq, s, base_t::anisointer, isNdfInfinity);
             dg1 = DG1.microfacetMeasure * hlsl::abs(cache.getVdotH() / base_t::anisointer.getNdotV());
             reflectance = _f(cache.getVdotH())[0];
             NdotH = cache.getAbsNdotH();
@@ -101,15 +97,18 @@ struct TestNDF : TestBxDF<BxDF>
         else
         {
             dg1_query_type dq = base_t::bxdf.ndf.template createDG1Query<iso_interaction, iso_cache>(base_t::isointer, isocache);
-            fresnel_type _f = bxdf::impl::getOrientedFresnel<fresnel_type, base_t::bxdf_t::IsBSDF>::__call(base_t::bxdf.fresnel, base_t::isointer.getNdotV());
-            quant_query_type qq = bxdf::impl::quant_query_helper<ndf_type, fresnel_type, base_t::bxdf_t::IsBSDF>::template __call<iso_cache>(base_t::bxdf.ndf, _f, isocache);
-            quant_type DG1 = base_t::bxdf.ndf.template DG1<sample_t, iso_interaction>(dq, qq, s, base_t::isointer);
+            fresnel_type _f = base_t::bxdf_t::__getOrientedFresnel(base_t::bxdf.fresnel, base_t::isointer.getNdotV());
+            quant_query_type qq = bxdf::impl::quant_query_helper<ndf_type, fresnel_type, base_t::bxdf_t::IsBSDF>::template __call<iso_interaction, iso_cache>(base_t::bxdf.ndf, _f, base_t::isointer, isocache);
+            quant_type DG1 = base_t::bxdf.ndf.template DG1<sample_t, iso_interaction>(dq, qq, s, base_t::isointer, isNdfInfinity);
             dg1 = DG1.microfacetMeasure * hlsl::abs(isocache.getVdotH() / base_t::isointer.getNdotV());
             reflectance = _f(isocache.getVdotH())[0];
             NdotH = isocache.getAbsNdotH();
             transmitted = isocache.isTransmission();
         }
 
+        if (isNdfInfinity)
+            return BET_INVALID;
+
         if (transmitted)
         {
             float eta = base_t::rc.eta.x;
@@ -148,7 +147,7 @@ struct TestNDF : TestBxDF<BxDF>
         }        
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -181,13 +180,13 @@ struct TestNDF : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.verbose = initparams.verbose;
         t.initBxDF(t.rc);
         
@@ -322,7 +321,7 @@ struct TestCTGenerateH : TestBxDF<BxDF>
             if (base_t::isointer.getNdotV() <= numeric_limits<float>::min)
                 return BET_INVALID;
         else if (traits_t::type == bxdf::BT_BSDF)
-            if (abs<float>(base_t::isointer.getNdotV()) <= numeric_limits<float>::min)
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= numeric_limits<float>::min)
                 return BET_INVALID;
 
         ErrorType res = compute();
@@ -334,13 +333,13 @@ struct TestCTGenerateH : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.numSamples = initparams.samples;
         t.immediateFail = initparams.immediateFail;
         t.initBxDF(t.rc);
@@ -376,7 +375,4 @@ struct TestCTGenerateH : TestBxDF<BxDF>
 };
 #endif
 
-}
-}
-
 #endif
\ No newline at end of file
diff --git a/66_HLSLBxDFTests/app_resources/tests.hlsl b/66_HLSLBxDFTests/app_resources/tests.hlsl
index 9011aa2e5..8f26bc4ee 100644
--- a/66_HLSLBxDFTests/app_resources/tests.hlsl
+++ b/66_HLSLBxDFTests/app_resources/tests.hlsl
@@ -3,11 +3,6 @@
 
 #include "tests_common.hlsl"
 
-namespace nbl
-{
-namespace hlsl
-{
-
 template<class BxDF, bool aniso = false>
 struct TestJacobian : TestBxDF<BxDF>
 {
@@ -69,7 +64,6 @@ struct TestJacobian : TestBxDF<BxDF>
         if (!(s.isValid() && sx.isValid() && sy.isValid()))
             return BET_INVALID;
 
-        // TODO: add checks with need clamp trait
         if (traits_t::type == bxdf::BT_BRDF)
         {
             if (s.getNdotL() <= bit_cast<float>(numeric_limits<float>::min))
@@ -77,7 +71,7 @@ struct TestJacobian : TestBxDF<BxDF>
         }
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -115,7 +109,7 @@ struct TestJacobian : TestBxDF<BxDF>
         }        
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -163,13 +157,13 @@ struct TestJacobian : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.verbose = initparams.verbose;
         t.initBxDF(t.rc);
         
@@ -245,7 +239,6 @@ struct TestReciprocity : TestBxDF<BxDF>
         if (!s.isValid())
             return BET_INVALID;
 
-        // TODO: add checks with need clamp trait
         if (bxdf::traits<BxDF>::type == bxdf::BT_BRDF)
         {
             if (s.getNdotL() <= bit_cast<float>(numeric_limits<float>::min))
@@ -253,7 +246,7 @@ struct TestReciprocity : TestBxDF<BxDF>
         }
         else if (bxdf::traits<BxDF>::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(s.getNdotL()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -264,6 +257,7 @@ struct TestReciprocity : TestBxDF<BxDF>
         rec_s = sample_t::createFromTangentSpace(rec_localL, anisointer.getFromTangentSpace());
 
         rec_isointer = iso_interaction_t::create(rec_V, base_t::rc.N);
+        rec_isointer.luminosityContributionHint = isointer.luminosityContributionHint;
         rec_anisointer = aniso_interaction_t::create(rec_isointer, base_t::rc.T, base_t::rc.B);
         rec_cache = cache;
         rec_cache.iso_cache.VdotH = cache.iso_cache.getLdotH();
@@ -330,7 +324,7 @@ struct TestReciprocity : TestBxDF<BxDF>
         }        
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -363,13 +357,13 @@ struct TestReciprocity : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.verbose = initparams.verbose;
         t.initBxDF(t.rc);
         
@@ -517,7 +511,7 @@ struct TestBucket : TestBxDF<BxDF>
         }        
         else if (traits_t::type == bxdf::BT_BSDF)
         {
-            if (abs<float>(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= bit_cast<float>(numeric_limits<float>::min))
                 return BET_INVALID;
         }
 
@@ -530,13 +524,13 @@ struct TestBucket : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.numSamples = initparams.samples;
         t.initBxDF(t.rc);
         
@@ -735,7 +729,7 @@ struct TestChi2 : TestBxDF<BxDF>
 
     void writeToEXR()
     {
-        std::string filename = std::format("chi2test_{}_{}.exr", base_t::rc.state, base_t::name);
+        std::string filename = std::format("chi2test_{}_{}.exr", base_t::rc.halfSeed, base_t::name);
 
         int totalWidth = phiSplits;
         int totalHeight = 2 * thetaSplits + 1;
@@ -869,7 +863,7 @@ struct TestChi2 : TestBxDF<BxDF>
                             cache.iso_cache.absNdotH = hlsl::abs(hlsl::dot(N, H));
                             cache.iso_cache.NdotH2 = cache.iso_cache.absNdotH * cache.iso_cache.absNdotH;
 
-                            if (!cache.isValid(bxdf::fresnel::OrientedEtas<vector<float,1> >::create(1.f, hlsl::promote<vector<float,1> >(eta))))
+                            if (!cache.isValid(bxdf::fresnel::OrientedEtas<hlsl::vector<float,1> >::create(1.f, hlsl::promote<hlsl::vector<float,1> >(eta))))
                                 return 0.f;
 
                             const float32_t3 T = base_t::anisointer.getT();
@@ -911,7 +905,7 @@ struct TestChi2 : TestBxDF<BxDF>
             if (base_t::isointer.getNdotV() <= numeric_limits<float>::min)
                 return BET_INVALID;
         else if (traits_t::type == bxdf::BT_BSDF)
-            if (abs<float>(base_t::isointer.getNdotV()) <= numeric_limits<float>::min)
+            if (hlsl::abs(base_t::isointer.getNdotV()) <= numeric_limits<float>::min)
                 return BET_INVALID;
 
         ErrorType res = compute();
@@ -994,13 +988,13 @@ struct TestChi2 : TestBxDF<BxDF>
 
     static void run(NBL_CONST_REF_ARG(STestInitParams) initparams, NBL_REF_ARG(FailureCallback) cb)
     {
-        random::PCG32 pcg = random::PCG32::construct(initparams.state);
+        random::PCG32 pcg = random::PCG32::construct(initparams.halfSeed);
         random::DimAdaptorRecursive<random::PCG32, 2> rand2d = random::DimAdaptorRecursive<random::PCG32, 2>::construct(pcg);
         uint32_t2 state = rand2d();
 
         this_t t;
         t.init(state);
-        t.rc.state = initparams.state;
+        t.rc.halfSeed = initparams.halfSeed;
         t.numSamples = initparams.samples;
         t.thetaSplits = initparams.thetaSplits;
         t.phiSplits = initparams.phiSplits;
@@ -1034,7 +1028,4 @@ struct TestChi2 : TestBxDF<BxDF>
 };
 #endif
 
-}
-}
-
 #endif
\ No newline at end of file
diff --git a/66_HLSLBxDFTests/app_resources/tests_common.hlsl b/66_HLSLBxDFTests/app_resources/tests_common.hlsl
index c0a8d9614..a9a3ef2ec 100644
--- a/66_HLSLBxDFTests/app_resources/tests_common.hlsl
+++ b/66_HLSLBxDFTests/app_resources/tests_common.hlsl
@@ -41,31 +41,29 @@ using namespace IMATH;
 using json = nlohmann::json;
 #endif
 
-namespace nbl
-{
-namespace hlsl
-{
+using namespace nbl;
+using namespace hlsl;
 
+using spectral_t = hlsl::vector<float, 3>;
 using ray_dir_info_t = bxdf::ray_dir_info::SBasic<float>;
-using iso_interaction = bxdf::surface_interactions::SIsotropic<ray_dir_info_t>;
+using iso_interaction = bxdf::surface_interactions::SIsotropic<ray_dir_info_t, spectral_t>;
 using aniso_interaction = bxdf::surface_interactions::SAnisotropic<iso_interaction>;
 using sample_t = bxdf::SLightSample<ray_dir_info_t>;
 using iso_cache = bxdf::SIsotropicMicrofacetCache<float>;
 using aniso_cache = bxdf::SAnisotropicMicrofacetCache<iso_cache>;
 using quotient_pdf_t = sampling::quotient_and_pdf<float32_t3, float>;
-using spectral_t = vector<float, 3>;
 
 using iso_config_t = bxdf::SConfiguration<sample_t, iso_interaction, spectral_t>;
 using aniso_config_t = bxdf::SConfiguration<sample_t, aniso_interaction, spectral_t>;
 using iso_microfacet_config_t = bxdf::SMicrofacetConfiguration<sample_t, iso_interaction, iso_cache, spectral_t>;
 using aniso_microfacet_config_t = bxdf::SMicrofacetConfiguration<sample_t, aniso_interaction, aniso_cache, spectral_t>;
 
-using bool32_t3 = vector<bool, 3>;
+using bool32_t3 = hlsl::vector<bool, 3>;
 
 template<typename T>
 struct ConvertToFloat01
 {
-    using ret_t = conditional_t<vector_traits<T>::Dimension==1, float, vector<float, vector_traits<T>::Dimension> >;
+    using ret_t = conditional_t<vector_traits<T>::Dimension==1, float, hlsl::vector<float, vector_traits<T>::Dimension> >;
 
     static ret_t __call(T x)
     {
@@ -78,19 +76,27 @@ bool checkEq(T a, T b, float32_t eps)
 {
     T _a = hlsl::abs(a);
     T _b = hlsl::abs(b);
-    return nbl::hlsl::all<vector<bool, vector_traits<T>::Dimension> >(nbl::hlsl::max<T>(_a / _b, _b / _a) <= hlsl::promote<T>(1 + eps));
+    return nbl::hlsl::all<hlsl::vector<bool, vector_traits<T>::Dimension> >(nbl::hlsl::max<T>(_a / _b, _b / _a) <= hlsl::promote<T>(1 + eps));
+}
+
+template<>
+bool checkEq<float32_t>(float32_t a, float32_t b, float32_t eps)
+{
+    float32_t _a = hlsl::abs(a);
+    float32_t _b = hlsl::abs(b);
+    return nbl::hlsl::max<float32_t>(_a / _b, _b / _a) <= float32_t(1 + eps);
 }
 
 template<typename T>
 bool checkLt(T a, T b)
 {
-    return nbl::hlsl::all<vector<bool, vector_traits<T>::Dimension> >(a < b);
+    return nbl::hlsl::all<hlsl::vector<bool, vector_traits<T>::Dimension> >(a < b);
 }
 
 template<typename T>
 bool checkZero(T a, float32_t eps)
 {
-    return nbl::hlsl::all<vector<bool, vector_traits<T>::Dimension> >(nbl::hlsl::abs<T>(a) < hlsl::promote<T>(eps));
+    return nbl::hlsl::all<hlsl::vector<bool, vector_traits<T>::Dimension> >(nbl::hlsl::abs<T>(a) < hlsl::promote<T>(eps));
 }
 
 template<>
@@ -110,12 +116,9 @@ struct SBxDFTestResources
         retval.u = ConvertToFloat01<uint32_t3>::__call(rng_vec3());
         retval.u.x = hlsl::clamp(retval.u.x, retval.eps, 1.f-retval.eps);
         retval.u.y = hlsl::clamp(retval.u.y, retval.eps, 1.f-retval.eps);
-        // retval.u.z = 0.0;
 
         retval.V.direction = nbl::hlsl::normalize<float32_t3>(sampling::UniformSphere<float>::generate(ConvertToFloat01<uint32_t2>::__call(rng_vec2())));
         retval.N = nbl::hlsl::normalize<float32_t3>(sampling::UniformSphere<float>::generate(ConvertToFloat01<uint32_t2>::__call(rng_vec2())));
-        // if (hlsl::dot(retval.N, retval.V.direction) < 0)
-        //     retval.V.direction = -retval.V.direction;
         
         float32_t3 tangent, bitangent;
         math::frisvad<float32_t3>(retval.N, tangent, bitangent);
@@ -131,11 +134,14 @@ struct SBxDFTestResources
         retval.alpha.y = ConvertToFloat01<uint32_t>::__call(retval.rng());
         retval.eta = ConvertToFloat01<uint32_t2>::__call(rng_vec2()) * hlsl::promote<float32_t2>(1.5) + hlsl::promote<float32_t2>(1.1); // range [1.1,2.6], also only do eta = eta/1.0 (air)
         retval.luma_coeff = float32_t3(0.2126, 0.7152, 0.0722); // luma coefficients for Rec. 709
+
+        retval.Dinc = ConvertToFloat01<uint32_t>::__call(retval.rng()) * 2400.0f + 100.0f;
+        retval.etaThinFilm = ConvertToFloat01<uint32_t>::__call(retval.rng()) * 0.5 + 1.1f; // range [1.1,1.6]
         return retval;
     }
 
     float eps = 1e-3;   // epsilon
-    uint32_t state;     // init state seed, for debugging
+    uint32_t halfSeed;     // init state seed, for debugging
 
     nbl::hlsl::Xoroshiro64Star rng;
     ray_dir_info_t V;
@@ -147,12 +153,16 @@ struct SBxDFTestResources
     float32_t2 alpha;
     float32_t2 eta; // (eta, etak)
     float32_t3 luma_coeff;
+
+    // thin film stuff;
+    float Dinc; // in nm [100, 2500]
+    float etaThinFilm;
 };
 
 struct STestInitParams
 {
     bool logInfo;
-    uint32_t state;
+    uint32_t halfSeed;
     uint32_t samples;
     uint32_t thetaSplits;
     uint32_t phiSplits;
@@ -184,6 +194,7 @@ struct TestBase
         rc = SBxDFTestResources::create(seed);
 
         isointer = iso_interaction::create(rc.V, rc.N);
+        isointer.luminosityContributionHint = rc.luma_coeff;
         anisointer = aniso_interaction::create(isointer, rc.T, rc.B);
     }
 
@@ -315,6 +326,28 @@ struct TestBxDF<bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t>> :
     }
 };
 
+template<>
+struct TestBxDF<bxdf::reflection::SIridescent<iso_microfacet_config_t>> : TestBxDFBase<bxdf::reflection::SIridescent<iso_microfacet_config_t>>
+{
+    using base_t = TestBxDFBase<bxdf::reflection::SIridescent<iso_microfacet_config_t>>;
+
+    void initBxDF(SBxDFTestResources _rc)
+    {
+        base_t::bxdf.ndf = base_t::bxdf_t::ndf_type::create(_rc.alpha.x);
+        using creation_params_t = base_t::bxdf_t::fresnel_type::creation_params_type;
+        creation_params_t params;
+        params.Dinc = _rc.Dinc;
+        params.ior1 = hlsl::promote<float32_t3>(1.0);
+        params.ior2 = hlsl::promote<float32_t3>(_rc.etaThinFilm);
+        params.ior3 = hlsl::promote<float32_t3>(_rc.eta.x);
+        params.iork3 = hlsl::promote<float32_t3>(_rc.eta.y);
+        base_t::bxdf.fresnel = base_t::bxdf_t::fresnel_type::create(params);
+#ifndef __HLSL_VERSION
+        base_t::name = "Iridescent BRDF";
+#endif
+    }
+};
+
 template<>
 struct TestBxDF<bxdf::transmission::SOrenNayar<iso_config_t>> : TestBxDFBase<bxdf::transmission::SOrenNayar<iso_config_t>>
 {
@@ -354,7 +387,6 @@ struct TestBxDF<bxdf::transmission::SThinSmoothDielectric<iso_config_t>> : TestB
     {
         using spectral_type = typename base_t::bxdf_t::spectral_type;
         base_t::bxdf.fresnel = bxdf::fresnel::Dielectric<spectral_type>::create(bxdf::fresnel::OrientedEtas<spectral_type>::create(base_t::isointer.getNdotV(bxdf::BxDFClampMode::BCM_ABS), hlsl::promote<spectral_type>(_rc.eta.x)));
-        base_t::bxdf.luminosityContributionHint = _rc.luma_coeff;
 #ifndef __HLSL_VERSION
         base_t::name = "Thin smooth dielectric BSDF";
 #endif
@@ -438,37 +470,62 @@ struct TestBxDF<bxdf::transmission::SGGXDielectricAnisotropic<Config>> : TestBxD
     }
 };
 
+template<class Config>
+struct TestBxDF<bxdf::transmission::SIridescent<Config>> : TestBxDFBase<bxdf::transmission::SIridescent<Config>>
+{
+    using base_t = TestBxDFBase<bxdf::transmission::SIridescent<Config>>;
+
+    void initBxDF(SBxDFTestResources _rc)
+    {
+        base_t::bxdf.ndf = base_t::bxdf_t::ndf_type::create(_rc.alpha.x);
+        using creation_params_t = base_t::bxdf_t::fresnel_type::creation_params_type;
+        creation_params_t params;
+        params.Dinc = _rc.Dinc;
+        params.ior1 = hlsl::promote<float32_t3>(1.0);
+        params.ior2 = hlsl::promote<float32_t3>(_rc.etaThinFilm);
+        params.ior3 = hlsl::promote<float32_t3>(_rc.eta.x);
+        base_t::bxdf.fresnel = base_t::bxdf_t::fresnel_type::create(params);
+#ifndef __HLSL_VERSION
+        base_t::name = "Iridescent BSDF";
+#endif
+    }
+};
+
 
 namespace reciprocity_test_impl
 {
-template<class RayDirInfo NBL_PRIMARY_REQUIRES(bxdf::ray_dir_info::Basic<RayDirInfo>)
+template<class RayDirInfo, class Spectrum NBL_PRIMARY_REQUIRES(bxdf::ray_dir_info::Basic<RayDirInfo> && concepts::FloatingPointLikeVectorial<Spectrum>)
 struct SIsotropic
 {
+    using this_t = SIsotropic<RayDirInfo, Spectrum>;
     using ray_dir_info_type = RayDirInfo;
     using scalar_type = typename RayDirInfo::scalar_type;
     using vector3_type = typename RayDirInfo::vector3_type;
+    using spectral_type = Spectrum;
 
     // WARNING: Changed since GLSL, now arguments need to be normalized!
-    static SIsotropic<RayDirInfo> create(NBL_CONST_REF_ARG(RayDirInfo) normalizedV, const vector3_type normalizedN)
+    static this_t create(NBL_CONST_REF_ARG(RayDirInfo) normalizedV, const vector3_type normalizedN)
     {
-        SIsotropic<RayDirInfo> retval;
+        this_t retval;
         retval.V = normalizedV;
         retval.N = normalizedN;
         retval.NdotV = nbl::hlsl::dot<vector3_type>(retval.N, retval.V.getDirection());
         retval.NdotV2 = retval.NdotV * retval.NdotV;
+        retval.luminosityContributionHint = hlsl::promote<spectral_type>(1.0);
 
         return retval;
     }
 
     template<typename I NBL_FUNC_REQUIRES(bxdf::surface_interactions::Isotropic<I>)
-    static SIsotropic<RayDirInfo> copy(NBL_CONST_REF_ARG(I) other)
+    static this_t copy(NBL_CONST_REF_ARG(I) other)
     {
-        SIsotropic<RayDirInfo> retval;
+        this_t retval;
         retval.V = other.getV();
         retval.N = other.getN();
         retval.NdotV = other.getNdotV();
         retval.NdotV2 = other.getNdotV2();
         retval.pathOrigin = bxdf::PathOrigin::PO_SENSOR;
+        retval.luminosityContributionHint = other.luminosityContributionHint;
         return retval;
     }
 
@@ -481,12 +538,14 @@ struct SIsotropic
     scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return NdotV2; }
 
     bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return pathOrigin; }
+    spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return luminosityContributionHint; }
 
     RayDirInfo V;
     vector3_type N;
     scalar_type NdotV;
     scalar_type NdotV2;
     bxdf::PathOrigin pathOrigin;
+    spectral_type luminosityContributionHint;
 };
 
 template<class IsotropicInteraction NBL_PRIMARY_REQUIRES(bxdf::surface_interactions::Isotropic<IsotropicInteraction>)
@@ -497,7 +556,8 @@ struct SAnisotropic
     using ray_dir_info_type = typename isotropic_interaction_type::ray_dir_info_type;
     using scalar_type = typename ray_dir_info_type::scalar_type;
     using vector3_type = typename ray_dir_info_type::vector3_type;
-    using matrix3x3_type = matrix<scalar_type, 3, 3>;
+    using matrix3x3_type = hlsl::matrix<scalar_type, 3, 3>;
+    using spectral_type = typename isotropic_interaction_type::spectral_type;
 
     // WARNING: Changed since GLSL, now arguments need to be normalized!
     static this_t create(
@@ -551,6 +611,7 @@ struct SAnisotropic
     scalar_type getNdotV(bxdf::BxDFClampMode _clamp = bxdf::BxDFClampMode::BCM_NONE) NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV(_clamp); }
     scalar_type getNdotV2() NBL_CONST_MEMBER_FUNC { return isotropic.getNdotV2(); }
     bxdf::PathOrigin getPathOrigin() NBL_CONST_MEMBER_FUNC { return isotropic.getPathOrigin(); }
+    spectral_type getLuminosityContributionHint() NBL_CONST_MEMBER_FUNC { return isotropic.getLuminosityContributionHint(); }
 
     vector3_type getT() NBL_CONST_MEMBER_FUNC { return T; }
     vector3_type getB() NBL_CONST_MEMBER_FUNC { return B; }
@@ -585,10 +646,10 @@ struct CustomIsoMicrofacetConfiguration<LS,Interaction,MicrofacetCache,Spectrum
 
     using scalar_type = typename LS::scalar_type;
     using ray_dir_info_type = typename LS::ray_dir_info_type;
-    using vector2_type = vector<scalar_type, 2>;
-    using vector3_type = vector<scalar_type, 3>;
-    using monochrome_type = vector<scalar_type, 1>;
-    using matrix3x3_type = matrix<scalar_type,3,3>;
+    using vector2_type = hlsl::vector<scalar_type, 2>;
+    using vector3_type = hlsl::vector<scalar_type, 3>;
+    using monochrome_type = hlsl::vector<scalar_type, 1>;
+    using matrix3x3_type = hlsl::matrix<scalar_type,3,3>;
     using isotropic_interaction_type = Interaction;
     using anisotropic_interaction_type = reciprocity_test_impl::SAnisotropic<isotropic_interaction_type>;
     using sample_type = LS;
@@ -599,12 +660,9 @@ struct CustomIsoMicrofacetConfiguration<LS,Interaction,MicrofacetCache,Spectrum
 };
 }
 
-using rectest_iso_interaction = reciprocity_test_impl::SIsotropic<ray_dir_info_t>;
+using rectest_iso_interaction = reciprocity_test_impl::SIsotropic<ray_dir_info_t, spectral_t>;
 using rectest_aniso_interaction = reciprocity_test_impl::SAnisotropic<rectest_iso_interaction>;
 using rectest_iso_microfacet_config_t = reciprocity_test_impl::CustomIsoMicrofacetConfiguration<sample_t, rectest_iso_interaction, iso_cache, spectral_t>;
 using rectest_aniso_microfacet_config_t = bxdf::SMicrofacetConfiguration<sample_t, rectest_aniso_interaction, aniso_cache, spectral_t>;
 
-}
-}
-
 #endif
diff --git a/66_HLSLBxDFTests/main.cpp b/66_HLSLBxDFTests/main.cpp
index a65b443c9..e4b43f4d7 100644
--- a/66_HLSLBxDFTests/main.cpp
+++ b/66_HLSLBxDFTests/main.cpp
@@ -33,34 +33,34 @@ struct PrintFailureCallback : FailureCallback
         {
         case BET_INVALID:
             if (logInfo)
-                fprintf(stderr, "[INFO] seed %u: %s skipping test due to invalid NdotV/NdotL config\n", failedFor.rc.state, failedFor.name.c_str());
+                fprintf(stderr, "[INFO] seed %u: %s skipping test due to invalid NdotV/NdotL config\n", failedFor.rc.halfSeed, failedFor.name.c_str());
             break;
         case BET_NEGATIVE_VAL:
-            fprintf(stderr, "[ERROR] seed %u: %s pdf/quotient/eval < 0\n", failedFor.rc.state, failedFor.name.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s pdf/quotient/eval < 0\n", failedFor.rc.halfSeed, failedFor.name.c_str());
             break;
         case BET_PDF_ZERO:
-            fprintf(stderr, "[ERROR] seed %u: %s pdf = 0\n", failedFor.rc.state, failedFor.name.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s pdf = 0\n", failedFor.rc.halfSeed, failedFor.name.c_str());
             break;
         case BET_QUOTIENT_INF:
-            fprintf(stderr, "[ERROR] seed %u: %s quotient -> inf\n", failedFor.rc.state, failedFor.name.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s quotient -> inf\n", failedFor.rc.halfSeed, failedFor.name.c_str());
             break;
         case BET_JACOBIAN:
-            fprintf(stderr, "[ERROR] seed %u: %s failed the jacobian * pdf test    %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s failed the jacobian * pdf test    %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str());
             break;
         case BET_PDF_EVAL_DIFF:
-            fprintf(stderr, "[ERROR] seed %u: %s quotient * pdf != eval    %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s quotient * pdf != eval    %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str());
             break;
         case BET_RECIPROCITY:
-            fprintf(stderr, "[ERROR] seed %u: %s failed the reciprocity test    %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s failed the reciprocity test    %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str());
             break;
         case BET_PRINT_MSG:
-            fprintf(stderr, "[ERROR] seed %u: %s error message\n%s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s error message\n%s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str());
             break;
         case BET_GENERATE_H:
-            fprintf(stderr, "[ERROR] seed %u: %s failed invalid H configuration generated    %s\n", failedFor.rc.state, failedFor.name.c_str(), failedFor.errMsg.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s failed invalid H configuration generated    %s\n", failedFor.rc.halfSeed, failedFor.name.c_str(), failedFor.errMsg.c_str());
             break;
         default:
-            fprintf(stderr, "[ERROR] seed %u: %s unknown error\n", failedFor.rc.state, failedFor.name.c_str());
+            fprintf(stderr, "[ERROR] seed %u: %s unknown error\n", failedFor.rc.halfSeed, failedFor.name.c_str());
         }
 
 #ifdef _NBL_DEBUG
@@ -183,7 +183,7 @@ int main(int argc, char** argv)
     auto rJacobian = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN(rJacobian)
     STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = i;
+    initparams.halfSeed = i;
     initparams.verbose = testconfigs["TestJacobian"]["verbose"];
 
     TestJacobian<bxdf::reflection::SLambertian<iso_config_t>>::run(initparams, cb);
@@ -193,6 +193,7 @@ int main(int argc, char** argv)
     TestJacobian<bxdf::reflection::SBeckmannAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestJacobian<bxdf::reflection::SGGXIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestJacobian<bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t>,true>::run(initparams, cb);
+    TestJacobian<bxdf::reflection::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
 
     TestJacobian<bxdf::transmission::SLambertian<iso_config_t>>::run(initparams, cb);
     TestJacobian<bxdf::transmission::SOrenNayar<iso_config_t>>::run(initparams, cb);
@@ -203,6 +204,7 @@ int main(int argc, char** argv)
     TestJacobian<bxdf::transmission::SBeckmannDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestJacobian<bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestJacobian<bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t>,true>::run(initparams, cb);
+    TestJacobian<bxdf::transmission::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
     FOR_EACH_END
 
 
@@ -211,7 +213,7 @@ int main(int argc, char** argv)
     auto rReciprocity = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN(rReciprocity)
     STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = 3;
+    initparams.halfSeed = i;
     initparams.verbose = testconfigs["TestReciprocity"]["verbose"];
 
     TestReciprocity<bxdf::reflection::SLambertian<iso_config_t>>::run(initparams, cb);
@@ -221,16 +223,18 @@ int main(int argc, char** argv)
     TestReciprocity<bxdf::reflection::SBeckmannAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestReciprocity<bxdf::reflection::SGGXIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestReciprocity<bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestReciprocity<bxdf::reflection::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
 
     TestReciprocity<bxdf::transmission::SLambertian<iso_config_t>>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SOrenNayar<iso_config_t>>::run(initparams, cb);
-    TestReciprocity<bxdf::transmission::SSmoothDielectric<iso_config_t>>::run(initparams, cb);
+    TestReciprocity<bxdf::transmission::SSmoothDielectric<iso_config_t>>::run(initparams, cb);    
     TestReciprocity<bxdf::transmission::SThinSmoothDielectric<iso_config_t>>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SDeltaDistribution<iso_config_t>>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SBeckmannDielectricIsotropic<rectest_iso_microfacet_config_t>, false>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SBeckmannDielectricAnisotropic<rectest_aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SGGXDielectricIsotropic<rectest_iso_microfacet_config_t>, false>::run(initparams, cb);
     TestReciprocity<bxdf::transmission::SGGXDielectricAnisotropic<rectest_aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestReciprocity<bxdf::transmission::SIridescent<rectest_iso_microfacet_config_t>, false>::run(initparams, cb);
     FOR_EACH_END
 
 
@@ -240,7 +244,7 @@ int main(int argc, char** argv)
     auto rBucket = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN(rBucket)
     STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = i;
+    initparams.halfSeed = i;
     initparams.samples = testconfigs["TestBucket"]["samples"];
 
     TestBucket<bxdf::reflection::SLambertian<iso_config_t>>::run(initparams, cb);
@@ -249,6 +253,7 @@ int main(int argc, char** argv)
     TestBucket<bxdf::reflection::SBeckmannAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestBucket<bxdf::reflection::SGGXIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestBucket<bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestBucket<bxdf::reflection::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
 
     TestBucket<bxdf::transmission::SLambertian<iso_config_t>>::run(initparams, cb);
     TestBucket<bxdf::transmission::SOrenNayar<iso_config_t>>::run(initparams, cb);
@@ -256,6 +261,7 @@ int main(int argc, char** argv)
     TestBucket<bxdf::transmission::SBeckmannDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestBucket<bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestBucket<bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestBucket<bxdf::transmission::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
     FOR_EACH_END
 
 
@@ -264,7 +270,7 @@ int main(int argc, char** argv)
     auto rChi2 = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN_EX(rChi2, std::execution::par_unseq)
     STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = i;
+    initparams.halfSeed = i;
     initparams.samples = testconfigs["TestChi2"]["samples"];
     initparams.thetaSplits = testconfigs["TestChi2"]["thetaSplits"];
     initparams.phiSplits = testconfigs["TestChi2"]["phiSplits"];
@@ -276,6 +282,7 @@ int main(int argc, char** argv)
     TestChi2<bxdf::reflection::SBeckmannAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestChi2<bxdf::reflection::SGGXIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestChi2<bxdf::reflection::SGGXAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestChi2<bxdf::reflection::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
 
     TestChi2<bxdf::transmission::SLambertian<iso_config_t>>::run(initparams, cb);
     TestChi2<bxdf::transmission::SOrenNayar<iso_config_t>>::run(initparams, cb);
@@ -283,15 +290,15 @@ int main(int argc, char** argv)
     TestChi2<bxdf::transmission::SBeckmannDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     TestChi2<bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestChi2<bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
+    TestChi2<bxdf::transmission::SIridescent<iso_microfacet_config_t>, false>::run(initparams, cb);
     FOR_EACH_END
 
-#if 0
     // testing ndf jacobian * dg1, ONLY for cook torrance bxdfs
     runs = testconfigs["TestNDF"]["runs"];
     auto rNdf = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN(rNdf)
         STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = i;
+    initparams.halfSeed = i;
     initparams.verbose = testconfigs["TestNDF"]["verbose"];
 
     TestNDF<bxdf::reflection::SBeckmannIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
@@ -304,14 +311,13 @@ int main(int argc, char** argv)
     TestNDF<bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestNDF<bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     FOR_EACH_END
-#endif
-#if 0
+
     // test generated H that NdotV*VdotH>=0.0, VdotL calculation
     runs = testconfigs["TestCTGenerateH"]["runs"];
     auto rGenerateH = std::ranges::views::iota(0u, runs);
     FOR_EACH_BEGIN_EX(rGenerateH, std::execution::par_unseq)
     STestInitParams initparams{ .logInfo = logInfo };
-    initparams.state = i;
+    initparams.halfSeed = i;
     initparams.samples = testconfigs["TestCTGenerateH"]["samples"];
     initparams.immediateFail = testconfigs["TestCTGenerateH"]["immediateFail"];
 
@@ -325,27 +331,39 @@ int main(int argc, char** argv)
     TestCTGenerateH<bxdf::transmission::SGGXDielectricIsotropic<iso_microfacet_config_t>, false>::run(initparams, cb);
     TestCTGenerateH<bxdf::transmission::SGGXDielectricAnisotropic<aniso_microfacet_config_t>, true>::run(initparams, cb);
     FOR_EACH_END
-#endif
 
     // test arccos angle sums
     {
         Xoroshiro64Star rng = Xoroshiro64Star::construct(uint32_t2(4, 2));
+        math::sincos_accumulator<float> angle_adder;
+
+        auto Sin = [&](const float cosA) -> float
+        {
+            return nbl::hlsl::sqrt(1.f - cosA * cosA);
+        };
+
         for (uint32_t i = 0; i < 10; i++)
         {
-            const float a = rng() * numbers::pi<float>;
-            const float b = rng() * numbers::pi<float>;
-            const float c = rng() * numbers::pi<float>;
-            const float d = rng() * numbers::pi<float>;
+            const float a = ConvertToFloat01<uint32_t>::__call(rng()) * 2.f - 1.f;
+            const float b = ConvertToFloat01<uint32_t>::__call(rng()) * 2.f - 1.f;
+            const float c = ConvertToFloat01<uint32_t>::__call(rng()) * 2.f - 1.f;
+            const float d = ConvertToFloat01<uint32_t>::__call(rng()) * 2.f - 1.f;
 
             const float exAB = acos(a) + acos(b);
-            float res = math::getSumofArccosAB(a, b);
-            if (res != exAB)
-                fprintf(stderr, "[ERROR] math::getSumofArccosAB failed! expected %f, got %f\n", exAB, res);
+            angle_adder = math::sincos_accumulator<float>::create(a, Sin(a));
+            angle_adder.addAngle(b, Sin(b));
+            float res = angle_adder.getSumofArccos();
+            if (!checkEq<float>(res, exAB, 1e-3))
+                fprintf(stderr, "[ERROR] angle adding (2 angles) failed! expected %f, got %f\n", exAB, res);
 
             const float exABCD = exAB + acos(c) + acos(d);
-            res = math::getSumofArccosABCD(a, b, c, d);
-            if (res != exABCD)
-                fprintf(stderr, "[ERROR] math::getSumofArccosABCD failed! expected %f, got %f\n", exABCD, res);
+            angle_adder = math::sincos_accumulator<float>::create(a, Sin(a));
+            angle_adder.addAngle(b, Sin(b));
+            angle_adder.addAngle(c, Sin(c));
+            angle_adder.addAngle(d, Sin(d));
+            res = angle_adder.getSumofArccos();
+            if (!checkEq<float>(res, exABCD, 1e-3))
+                fprintf(stderr, "[ERROR] angle adding (4 angles) failed! expected %f, got %f\n", exABCD, res);
         }
     }
 
diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt
index d26a90205..1fdfc03ce 100644
--- a/67_RayQueryGeometry/CMakeLists.txt
+++ b/67_RayQueryGeometry/CMakeLists.txt
@@ -25,4 +25,49 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/common.hlsl
+    app_resources/render.comp.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/render.comp.hlsl",
+        "KEY": "render",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl
index 68a353adc..ecac0f59d 100644
--- a/67_RayQueryGeometry/app_resources/common.hlsl
+++ b/67_RayQueryGeometry/app_resources/common.hlsl
@@ -3,7 +3,7 @@
 
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 
-NBL_CONSTEXPR uint32_t WorkgroupSize = 16;
+NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = 16;
 
 enum NormalType : uint32_t
 {
diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl
index 954598c9a..889e1f38b 100644
--- a/67_RayQueryGeometry/app_resources/render.comp.hlsl
+++ b/67_RayQueryGeometry/app_resources/render.comp.hlsl
@@ -1,7 +1,5 @@
 #include "common.hlsl"
 
-#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
-
 #include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl"
 #include "nbl/builtin/hlsl/bda/__ptr.hlsl"
diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp
index 2783385f2..b35000485 100644
--- a/67_RayQueryGeometry/main.cpp
+++ b/67_RayQueryGeometry/main.cpp
@@ -2,6 +2,7 @@
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
 #include "common.hpp"
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication
 {
@@ -150,8 +151,10 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built
 				const std::string shaderPath = "app_resources/render.comp.hlsl";
 				IAssetLoader::SAssetLoadParams lparams = {};
 				lparams.logger = m_logger.get();
-				lparams.workingDirectory = "";
-				auto bundle = m_assetMgr->getAsset(shaderPath, lparams);
+				lparams.workingDirectory = "app_resources";
+
+				auto key = nbl::this_example::builtin::build::get_spirv_key<"render">(m_device.get());
+				auto bundle = m_assetMgr->getAsset(key.data(), lparams);
 				if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER)
 				{
 					m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, shaderPath);
@@ -160,10 +163,9 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built
 
 				const auto assets = bundle.getContents();
 				assert(assets.size() == 1);
-				smart_refctd_ptr<IShader> shaderSrc = IAsset::castDown<IShader>(assets[0]);
-				auto shader = m_device->compileShader({shaderSrc.get()});
+				smart_refctd_ptr<IShader> shader = IAsset::castDown<IShader>(assets[0]);
 				if (!shader)
-					return logFail("Failed to create shader!");
+					return logFail("Failed to load precompiled shader!");
 
 				SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0u, .size = sizeof(SPushConstants)};
 				auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr<const IGPUDescriptorSetLayout>(renderDs->getLayout()), nullptr, nullptr, nullptr);
diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt
index a434ff32a..842492167 100644
--- a/70_FLIPFluids/CMakeLists.txt
+++ b/70_FLIPFluids/CMakeLists.txt
@@ -21,4 +21,100 @@ if(NBL_EMBED_BUILTIN_RESOURCES)
 	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
 
 	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
-endif()
\ No newline at end of file
+endif()
+
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/compute/advectParticles.comp.hlsl
+	app_resources/compute/applyBodyForces.comp.hlsl
+	app_resources/compute/diffusion.comp.hlsl
+	app_resources/compute/genParticleVertices.comp.hlsl
+	app_resources/compute/particlesInit.comp.hlsl
+	app_resources/compute/prepareCellUpdate.comp.hlsl
+	app_resources/compute/pressureSolver.comp.hlsl
+	app_resources/compute/updateFluidCells.comp.hlsl
+	app_resources/cellUtils.hlsl
+	app_resources/common.hlsl
+	app_resources/descriptor_bindings.hlsl
+	app_resources/fluidParticles.fragment.hlsl
+	app_resources/fluidParticles.vertex.hlsl
+	app_resources/gridSampling.hlsl
+	app_resources/gridUtils.hlsl
+	app_resources/render_common.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/compute/diffusion.comp.hlsl",
+        "KEY": "diffusion",
+    },
+    {
+        "INPUT": "app_resources/fluidParticles.vertex.hlsl",
+        "KEY": "fluidParticles_vertex",
+    },
+    {
+        "INPUT": "app_resources/fluidParticles.fragment.hlsl",
+        "KEY": "fluidParticles_fragment",
+    },
+    {
+        "INPUT": "app_resources/compute/particlesInit.comp.hlsl",
+        "KEY": "particlesInit",
+    },
+    {
+        "INPUT": "app_resources/compute/genParticleVertices.comp.hlsl",
+        "KEY": "genParticleVertices",
+    },
+    {
+        "INPUT": "app_resources/compute/prepareCellUpdate.comp.hlsl",
+        "KEY": "prepareCellUpdate",
+    },
+    {
+        "INPUT": "app_resources/compute/updateFluidCells.comp.hlsl",
+        "KEY": "updateFluidCells",
+    },
+    {
+        "INPUT": "app_resources/compute/applyBodyForces.comp.hlsl",
+        "KEY": "applyBodyForces",
+    },
+    {
+        "INPUT": "app_resources/compute/pressureSolver.comp.hlsl",
+        "KEY": "pressureSolver",
+    },
+    {
+        "INPUT": "app_resources/compute/advectParticles.comp.hlsl",
+        "KEY": "advectParticles",
+    }
+    
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
\ No newline at end of file
diff --git a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl
index e53c91d2d..288b82764 100644
--- a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl
+++ b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl
@@ -67,6 +67,7 @@ void setAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID)
 }
 
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void setNeighborAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID)
 {
     int3 cellIdx = ID;
@@ -127,6 +128,7 @@ float3 calculateDiffusionVelStep(int3 idx, float3 sampledVelocity, uint cellMate
 }
 
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID)
 {
     uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID();
@@ -212,6 +214,7 @@ void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID)
 
 // TODO: same as the pressure solver, this kernel/dispatch should be fused onto `iterateDiffusion` guarded by `isLastIteration` push constant
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void applyDiffusion(uint32_t3 ID : SV_DispatchThreadID)
 {
     int3 cellIdx = ID;
diff --git a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl
index b5db995c5..e71f05912 100644
--- a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl
+++ b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl
@@ -89,6 +89,7 @@ float calculatePressureStep(int3 idx)
 }
 
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID)
 {
     uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID();
@@ -168,6 +169,7 @@ void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID)
 
 // TODO: why doesn't the last invocation of `iteratePressureSystem` have this step fused into it!? It would be just a simple push constant `isLastIteration` that would decide whether to run this dispatch
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void updateVelocities(uint32_t3 ID : SV_DispatchThreadID)
 {
     int3 cellIdx = ID;
diff --git a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl
index 62ddfd822..ea37660c1 100644
--- a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl
+++ b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl
@@ -23,6 +23,7 @@ cbuffer GridData
 
 // TODO: f 0 is AIR, and >=2 is SOLID, we can perform Atomic OR 0b01 to have a particle set the cell to FLUID, and this dispatch looping over all grid cells is not needed!
 [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)]
+[shader("compute")]
 void updateFluidCells(uint32_t3 ID : SV_DispatchThreadID)
 {
     int3 cIdx = ID;
diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp
index 899d00ba4..a70064245 100644
--- a/70_FLIPFluids/main.cpp
+++ b/70_FLIPFluids/main.cpp
@@ -2,6 +2,7 @@
 // This file is part of the "Nabla Engine".
 // For conditions of distribution and use, see copyright notice in nabla.h
 
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
 
 #include "nbl/examples/examples.hpp"
 // TODO: why is it not in nabla.h ?
@@ -344,11 +345,12 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         if (!initGraphicsPipeline())
             return logFail("Failed to initialize render pipeline!\n");
 
-        auto createComputePipeline = [&](smart_refctd_ptr<IGPUComputePipeline>& pipeline, smart_refctd_ptr<IDescriptorPool>& pool,
-            smart_refctd_ptr<IGPUDescriptorSet>& set, const std::string& shaderPath, const std::string& entryPoint,
+        
+        auto createComputePipeline = [&]<core::StringLiteral ShaderKey>(smart_refctd_ptr<IGPUComputePipeline>& pipeline, smart_refctd_ptr<IDescriptorPool>& pool,
+            smart_refctd_ptr<IGPUDescriptorSet>& set, const std::string& entryPoint,
             const std::span<const IGPUDescriptorSetLayout::SBinding> bindings, const asset::SPushConstantRange& pcRange = {}) -> void
             {
-                auto shader = compileShader(shaderPath, entryPoint);
+                auto shader = loadPrecompiledShader<ShaderKey>();
 
                 auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(bindings);
 
@@ -378,8 +380,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         {
             // init particles pipeline
             const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) };
-            createComputePipeline(m_initParticlePipeline, m_initParticlePool, m_initParticleDs,
-                "app_resources/compute/particlesInit.comp.hlsl", "main", piParticlesInit_bs1, pcRange);
+            createComputePipeline.operator()<"particlesInit">(m_initParticlePipeline, m_initParticlePool, m_initParticleDs,
+                 "main", piParticlesInit_bs1, pcRange);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[1];
@@ -395,8 +397,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         {
             // generate particle vertex pipeline
             const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 3 * sizeof(uint64_t) };
-            createComputePipeline(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs,
-                "app_resources/compute/genParticleVertices.comp.hlsl", "main", gpvGenVertices_bs1, pcRange);
+            createComputePipeline.operator()<"genParticleVertices">(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs,
+                "main", gpvGenVertices_bs1, pcRange);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[2];
@@ -414,8 +416,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         // update fluid cells pipelines
         {
             const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) };
-            createComputePipeline(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs,
-                "app_resources/compute/prepareCellUpdate.comp.hlsl", "main", ufcAccWeights_bs1, pcRange);
+            createComputePipeline.operator()<"prepareCellUpdate">(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs,
+                "main", ufcAccWeights_bs1, pcRange);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[2];
@@ -457,8 +459,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            createComputePipeline(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs,
-                "app_resources/compute/updateFluidCells.comp.hlsl", "updateFluidCells", ufcFluidCell_bs1);
+            createComputePipeline.operator()<"updateFluidCells">(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs,
+                "updateFluidCells", ufcFluidCell_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[3];
@@ -479,8 +481,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            createComputePipeline(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs,
-                "app_resources/compute/updateFluidCells.comp.hlsl", "updateNeighborFluidCells", ufcNeighborCell_bs1);
+            createComputePipeline.operator()<"updateFluidCells">(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs,
+                "updateNeighborFluidCells", ufcNeighborCell_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[3];
@@ -527,8 +529,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         }
         {
             // apply forces pipeline
-            createComputePipeline(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, 
-                "app_resources/compute/applyBodyForces.comp.hlsl", "main", abfApplyForces_bs1);
+            createComputePipeline.operator()<"applyBodyForces">(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, 
+                "main", abfApplyForces_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[2];
@@ -559,8 +561,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         }
         // apply diffusion pipelines
         {
-            createComputePipeline(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, 
-                "app_resources/compute/diffusion.comp.hlsl", "setAxisCellMaterial", dAxisCM_bs1);
+            createComputePipeline.operator()<"diffusion">(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, 
+                "setAxisCellMaterial", dAxisCM_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[3];
@@ -581,8 +583,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            createComputePipeline(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, 
-                "app_resources/compute/diffusion.comp.hlsl", "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1);
+            createComputePipeline.operator()<"diffusion">(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, 
+                "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[3];
@@ -603,10 +605,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            const std::string iterateKernel = "iterateDiffusion";
-            const std::string applyKernel = "applyDiffusion";
-            auto iterateShader = compileShader("app_resources/compute/diffusion.comp.hlsl", iterateKernel);
-            auto applyShader = compileShader("app_resources/compute/diffusion.comp.hlsl", applyKernel);
+            smart_refctd_ptr<IShader> diffusion = loadPrecompiledShader<"diffusion">(); // "app_resources/compute/diffusion.comp.hlsl"
 
             auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(dDiffuse_bs1);
 
@@ -625,16 +624,16 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             {
                 IGPUComputePipeline::SCreationParams params = {};
                 params.layout = pipelineLayout.get();
-                params.shader.entryPoint = iterateKernel;
-                params.shader.shader = iterateShader.get();
+                params.shader.entryPoint = "iterateDiffusion";
+                params.shader.shader = diffusion.get();
 
                 m_device->createComputePipelines(nullptr, { &params,1 }, &m_iterateDiffusionPipeline);
             }
             {
                 IGPUComputePipeline::SCreationParams params = {};
                 params.layout = pipelineLayout.get();
-                params.shader.entryPoint = applyKernel;
-                params.shader.shader = applyShader.get();
+                params.shader.entryPoint = "applyDiffusion";
+                params.shader.shader = diffusion.get();
 
                 m_device->createComputePipelines(nullptr, { &params,1 }, &m_diffusionPipeline);
             }
@@ -676,8 +675,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         }
         // solve pressure system pipelines
         {
-            createComputePipeline(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, 
-                "app_resources/compute/pressureSolver.comp.hlsl", "calculateNegativeDivergence", psDivergence_bs1);
+            createComputePipeline.operator()<"pressureSolver">(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, 
+                "calculateNegativeDivergence", psDivergence_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[3];
@@ -711,8 +710,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            createComputePipeline(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs,
-                "app_resources/compute/pressureSolver.comp.hlsl", "iteratePressureSystem", psIteratePressure_bs1);
+            createComputePipeline.operator()<"pressureSolver">(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs,
+                "iteratePressureSystem", psIteratePressure_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[5];
@@ -740,8 +739,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
             }
         }
         {
-            createComputePipeline(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, 
-                "app_resources/compute/pressureSolver.comp.hlsl", "updateVelocities", psUpdateVelPs_bs1);
+            createComputePipeline.operator()<"pressureSolver">(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, 
+                "updateVelocities", psUpdateVelPs_bs1);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[4];
@@ -780,8 +779,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         {
             // advect particles pipeline
             const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) };
-            createComputePipeline(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs,
-                "app_resources/compute/advectParticles.comp.hlsl", "main", apAdvectParticles_bs1, pcRange);
+            createComputePipeline.operator()<"advectParticles">(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs,
+                "main", apAdvectParticles_bs1, pcRange);
 
             {
                 IGPUDescriptorSet::SDescriptorInfo infos[2];
@@ -1400,51 +1399,25 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
         numParticles = m_gridData.particleInitSize.x * m_gridData.particleInitSize.y * m_gridData.particleInitSize.z * particlesPerCell;
     }
 
-    smart_refctd_ptr<IShader> compileShader(const std::string& filePath, const std::string& entryPoint = "main")
+    template<core::StringLiteral ShaderKey>
+    smart_refctd_ptr<IShader> loadPrecompiledShader()
     {
         IAssetLoader::SAssetLoadParams lparams = {};
         lparams.logger = m_logger.get();
-        lparams.workingDirectory = "";
-        auto bundle = m_assetMgr->getAsset(filePath, lparams);
+        lparams.workingDirectory = "app_resources";
+        auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+        auto bundle = m_assetMgr->getAsset(key.data(), lparams);
         if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER)
         {
-            m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath);
+            m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey);
             exit(-1);
         }
         
         const auto assets = bundle.getContents();
         assert(assets.size() == 1);
-        smart_refctd_ptr<IShader> shaderSrc = IAsset::castDown<IShader>(assets[0]);
-        const auto hlslMetadata = static_cast<const CHLSLMetadata*>(bundle.getMetadata());
-        const auto shaderStage = hlslMetadata->shaderStages->front();
+        smart_refctd_ptr<IShader> shader = IAsset::castDown<IShader>(assets[0]);
 
-        smart_refctd_ptr<IShader> shader = shaderSrc;
-        if (entryPoint != "main")
-        {
-            auto compiler = make_smart_refctd_ptr<asset::CHLSLCompiler>(smart_refctd_ptr(m_system));
-            CHLSLCompiler::SOptions options = {};
-            options.stage = shaderStage;
-            if (!(options.stage == IShader::E_SHADER_STAGE::ESS_COMPUTE || options.stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT))
-                options.stage = IShader::E_SHADER_STAGE::ESS_VERTEX;
-            options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion;
-            options.spirvOptimizer = nullptr;
-        #ifndef _NBL_DEBUG
-            ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO;
-            auto opt = make_smart_refctd_ptr<ISPIRVOptimizer>(std::span<ISPIRVOptimizer::E_OPTIMIZER_PASS>(&optPasses, 1));
-            options.spirvOptimizer = opt.get();
-        #endif
-            options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT;
-            options.preprocessorOptions.sourceIdentifier = shaderSrc->getFilepathHint();
-            options.preprocessorOptions.logger = m_logger.get();
-            options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder();
-
-            std::string dxcOptionStr[] = {"-E " + entryPoint};
-            options.dxcOptions = std::span(dxcOptionStr);
-
-            shader = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options);
-        }
-
-        return m_device->compileShader({ shader.get() });
+        return shader;
     }
 
     // TODO: there's a method in IUtilities for this
@@ -1563,28 +1536,27 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso
 
         // init shaders and pipeline
 
-        auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr<IShader>
+        auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
+        {
+            IAssetLoader::SAssetLoadParams lparams = {};
+            lparams.logger = m_logger.get();
+            lparams.workingDirectory = "app_resources";
+            auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+            auto bundle = m_assetMgr->getAsset(key.data(), lparams);
+            if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER)
             {
-                IAssetLoader::SAssetLoadParams lparams = {};
-                lparams.logger = m_logger.get();
-                lparams.workingDirectory = "";
-                auto bundle = m_assetMgr->getAsset(filePath, lparams);
-                if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER)
-                {
-                    m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath);
-                    exit(-1);
-                }
+                m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey);
+                exit(-1);
+            }
         
-                const auto assets = bundle.getContents();
-                assert(assets.size() == 1);
-                smart_refctd_ptr<IShader> shaderSrc = IAsset::castDown<IShader>(assets[0]);
-                if (!shaderSrc)
-                    return nullptr;
+            const auto assets = bundle.getContents();
+            assert(assets.size() == 1);
+            smart_refctd_ptr<IShader> shader = IAsset::castDown<IShader>(assets[0]);
 
-                return m_device->compileShader({ shaderSrc.get() });
-            };
-        auto vs = compileShader("app_resources/fluidParticles.vertex.hlsl");
-        auto fs = compileShader("app_resources/fluidParticles.fragment.hlsl");
+            return shader;
+        };
+        auto vs = loadPrecompiledShader.operator()<"fluidParticles_vertex">(); // "app_resources/fluidParticles.vertex.hlsl"
+        auto fs = loadPrecompiledShader.operator()<"fluidParticles_fragment">(); // "app_resources/fluidParticles.fragment.hlsl"
 
         smart_refctd_ptr<video::IGPUDescriptorSetLayout> descriptorSetLayout1;
         {
diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt
index 07b0fd396..d7bb13671 100644
--- a/71_RayTracingPipeline/CMakeLists.txt
+++ b/71_RayTracingPipeline/CMakeLists.txt
@@ -34,4 +34,104 @@ if(NBL_BUILD_IMGUI)
 	endif()
 endif()
 
+set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen")
+set(DEPENDS
+    app_resources/common.hlsl
+    app_resources/light_directional.rcall.hlsl
+	app_resources/light_point.rcall.hlsl
+	app_resources/light_spot.rcall.hlsl
+	app_resources/present.frag.hlsl
+	app_resources/raytrace.rahit.hlsl
+	app_resources/raytrace.rchit.hlsl
+	app_resources/raytrace.rgen.hlsl
+	app_resources/raytrace.rint.hlsl
+	app_resources/raytrace.rmiss.hlsl
+	app_resources/raytrace_procedural.rchit.hlsl
+	app_resources/raytrace_shadow.rahit.hlsl
+	app_resources/raytrace_shadow.rmiss.hlsl
+)
+target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS})
+set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON)
+
+set(SM 6_8)
+set(JSON [=[
+[
+    {
+        "INPUT": "app_resources/raytrace.rgen.hlsl",
+        "KEY": "raytrace_rgen",
+    },
+	{
+        "INPUT": "app_resources/raytrace.rchit.hlsl",
+        "KEY": "raytrace_rchit",
+    },
+	{
+        "INPUT": "app_resources/raytrace_procedural.rchit.hlsl",
+        "KEY": "raytrace_procedural_rchit",
+    },
+	{
+        "INPUT": "app_resources/raytrace.rint.hlsl",
+        "KEY": "raytrace_rint",
+    },
+	{
+        "INPUT": "app_resources/raytrace.rahit.hlsl",
+        "KEY": "raytrace_rahit",
+    },
+	{
+        "INPUT": "app_resources/raytrace_shadow.rahit.hlsl",
+        "KEY": "raytrace_shadow_rahit",
+    },
+	{
+        "INPUT": "app_resources/raytrace.rmiss.hlsl",
+        "KEY": "raytrace_rmiss",
+    },
+	{
+        "INPUT": "app_resources/raytrace_shadow.rmiss.hlsl",
+        "KEY": "raytrace_shadow_rmiss",
+    },
+	{
+        "INPUT": "app_resources/light_directional.rcall.hlsl",
+        "KEY": "light_directional_rcall",
+    },
+	{
+        "INPUT": "app_resources/light_point.rcall.hlsl",
+        "KEY": "light_point_rcall",
+    },
+	{
+        "INPUT": "app_resources/light_spot.rcall.hlsl",
+        "KEY": "light_spot_rcall",
+    },
+	{
+        "INPUT": "app_resources/present.frag.hlsl",
+        "KEY": "present_frag",
+    }
+]
+]=])
+string(CONFIGURE "${JSON}" JSON)
+
+set(COMPILE_OPTIONS
+    -I "${CMAKE_CURRENT_SOURCE_DIR}"
+    -T lib_${SM}
+)
+
+NBL_CREATE_NSC_COMPILE_RULES(
+    TARGET ${EXECUTABLE_NAME}SPIRV
+    LINK_TO ${EXECUTABLE_NAME}
+    DEPENDS ${DEPENDS}
+    BINARY_DIR ${OUTPUT_DIRECTORY}
+    MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT
+    COMMON_OPTIONS ${COMPILE_OPTIONS}
+    OUTPUT_VAR KEYS
+    INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp
+    NAMESPACE nbl::this_example::builtin::build
+    INPUTS ${JSON}
+)
+
+NBL_CREATE_RESOURCE_ARCHIVE(
+    NAMESPACE nbl::this_example::builtin::build
+    TARGET ${EXECUTABLE_NAME}_builtinsBuild
+    LINK_TO ${EXECUTABLE_NAME}
+    BIND ${OUTPUT_DIRECTORY}
+    BUILTINS ${KEYS}
+)
+
 
diff --git a/71_RayTracingPipeline/app_resources/common.hlsl b/71_RayTracingPipeline/app_resources/common.hlsl
index f9d67af78..502b53160 100644
--- a/71_RayTracingPipeline/app_resources/common.hlsl
+++ b/71_RayTracingPipeline/app_resources/common.hlsl
@@ -4,6 +4,7 @@
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 #include "nbl/builtin/hlsl/cpp_compat/basic.h"
 #include "nbl/builtin/hlsl/random/pcg.hlsl"
+#include "nbl/builtin/hlsl/type_traits.hlsl"
 
 NBL_CONSTEXPR uint32_t WorkgroupSize = 16;
 NBL_CONSTEXPR uint32_t MAX_UNORM_10 = 1023;
@@ -78,6 +79,9 @@ struct MaterialPacked
         return (xi>>22) > alpha;
     }
 };
+#ifdef __HLSL_VERSION
+NBL_REGISTER_OBJ_TYPE(MaterialPacked, 4)
+#endif
 
 struct SProceduralGeomInfo
 {
@@ -103,6 +107,9 @@ struct STriangleGeomInfo
     uint32_t indexType : 1; // 16 bit, 32 bit
 
 };
+#ifdef __HLSL_VERSION
+NBL_REGISTER_OBJ_TYPE(STriangleGeomInfo, 8)
+#endif
 
 enum E_GEOM_TYPE : uint16_t
 {
diff --git a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl
index 956ad5fe6..da7cc1594 100644
--- a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl
+++ b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl
@@ -10,7 +10,8 @@ using namespace nbl::hlsl;
 void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs)
 {
     const int instID = spirv::InstanceCustomIndexKHR;
-    const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo));
+    const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v<STriangleGeomInfo>;
+    const STriangleGeomInfo geom = vk::BufferPointer<STriangleGeomInfo, STriangleGeomInfoAlignment>(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get();
 
     const uint32_t bitpattern = payload.pcg();
     // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279
diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl
index 0a8bc5ec8..e6ebcda78 100644
--- a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl
+++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl
@@ -38,9 +38,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary)
 
     if (normalBufferAddress == 0)
     {
-        float3 v0 = vk::RawBufferLoad<float3>(vertexBufferAddress + indices[0] * 12);
-        float3 v1 = vk::RawBufferLoad<float3>(vertexBufferAddress + indices[1] * 12);
-        float3 v2 = vk::RawBufferLoad<float3>(vertexBufferAddress + indices[2] * 12);
+        float3 v0 = (nbl::hlsl::bda::__ptr<float3>::create(vertexBufferAddress) + indices[0]).deref().load();
+        float3 v1 = (nbl::hlsl::bda::__ptr<float3>::create(vertexBufferAddress) + indices[1]).deref().load();
+        float3 v2 = (nbl::hlsl::bda::__ptr<float3>::create(vertexBufferAddress) + indices[2]).deref().load();
 
         return normalize(cross(v2 - v0, v1 - v0));
     }
@@ -50,9 +50,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary)
     {
         case NT_R8G8B8A8_SNORM:
         {
-            uint32_t v0 = vk::RawBufferLoad<uint32_t>(normalBufferAddress + indices[0] * 4);
-            uint32_t v1 = vk::RawBufferLoad<uint32_t>(normalBufferAddress + indices[1] * 4);
-            uint32_t v2 = vk::RawBufferLoad<uint32_t>(normalBufferAddress + indices[2] * 4);
+            uint32_t v0 = (nbl::hlsl::bda::__ptr<uint32_t>::create(normalBufferAddress) + indices[0]).deref().load();
+            uint32_t v1 = (nbl::hlsl::bda::__ptr<uint32_t>::create(normalBufferAddress) + indices[1]).deref().load();
+            uint32_t v2 = (nbl::hlsl::bda::__ptr<uint32_t>::create(normalBufferAddress) + indices[2]).deref().load();
 
             n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz);
             n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz);
@@ -61,9 +61,13 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary)
         break;
         case NT_R32G32B32_SFLOAT:
         {
-            n0 = normalize(vk::RawBufferLoad<float3>(normalBufferAddress + indices[0] * 12));
-            n1 = normalize(vk::RawBufferLoad<float3>(normalBufferAddress + indices[1] * 12));
-            n2 = normalize(vk::RawBufferLoad<float3>(normalBufferAddress + indices[2] * 12));
+            float3 v0 = (nbl::hlsl::bda::__ptr<float3>::create(normalBufferAddress) + indices[0]).deref().load();
+            float3 v1 = (nbl::hlsl::bda::__ptr<float3>::create(normalBufferAddress) + indices[1]).deref().load();
+            float3 v2 = (nbl::hlsl::bda::__ptr<float3>::create(normalBufferAddress) + indices[2]).deref().load();
+
+            n0 = normalize(v0);
+            n1 = normalize(v1);
+            n2 = normalize(v2);
         }
         break;
     }
@@ -81,7 +85,8 @@ void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes
     const int primID = spirv::PrimitiveId;
     const int instanceCustomIndex = spirv::InstanceCustomIndexKHR;
     const int geometryIndex = spirv::RayGeometryIndexKHR;
-    const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo));
+    const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v<STriangleGeomInfo>;
+    const STriangleGeomInfo geom = vk::BufferPointer<STriangleGeomInfo, STriangleGeomInfoAlignment>(pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)).Get();
     const float32_t3 vertexNormal = calculateNormals(primID, geom, attribs.barycentrics);
     const float32_t3 worldNormal = normalize(mul(vertexNormal, transpose(spirv::WorldToObjectKHR)).xyz);
 
diff --git a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl
index efc99cad9..c42d5a7df 100644
--- a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl
+++ b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl
@@ -1,6 +1,5 @@
 #include "common.hlsl"
 
-#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl"
 #include "nbl/builtin/hlsl/random/xoroshiro.hlsl"
 
 #include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
@@ -80,15 +79,16 @@ void main()
 
         Material material;
         MaterialId materialId = payload.materialId;
+        const static uint64_t MaterialPackedAlignment = nbl::hlsl::alignment_of_v<MaterialPacked>;
         // we use negative index to indicate that this is a procedural geometry
         if (materialId.isHitProceduralGeom())
         {
-            const MaterialPacked materialPacked = vk::RawBufferLoad<MaterialPacked>(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo));
+            const MaterialPacked materialPacked = vk::BufferPointer<MaterialPacked, MaterialPackedAlignment>(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)).Get();
             material = nbl::hlsl::_static_cast<Material>(materialPacked);
         }
         else
         {
-            const MaterialPacked materialPacked = vk::RawBufferLoad<MaterialPacked>(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo));
+            const MaterialPacked materialPacked = vk::BufferPointer<MaterialPacked, MaterialPackedAlignment>(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)).Get();
             material = nbl::hlsl::_static_cast<Material>(materialPacked);
         }
 
diff --git a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl
index 72f9beffd..551be1c8a 100644
--- a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl
+++ b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl
@@ -36,8 +36,9 @@ void main()
 
     const int primID = spirv::PrimitiveId;
 
+    const static uint64_t SProceduralGeomInfoAlignment = nbl::hlsl::alignment_of_v<STriangleGeomInfo>;
     // Sphere data
-    SProceduralGeomInfo sphere = vk::RawBufferLoad<SProceduralGeomInfo>(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo));
+    SProceduralGeomInfo sphere = vk::BufferPointer<SProceduralGeomInfo, SProceduralGeomInfoAlignment>(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)).Get();
 
     const float32_t tHit = hitSphere(sphere, ray);
     
diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl
index e41551512..d87b8dd5d 100644
--- a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl
+++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl
@@ -1,6 +1,7 @@
 #include "common.hlsl"
 #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl"
 #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
+#include "nbl/builtin/hlsl/type_traits.hlsl"
 
 using namespace nbl::hlsl;
 
@@ -10,7 +11,8 @@ using namespace nbl::hlsl;
 void main(inout OcclusionPayload payload, in BuiltInTriangleIntersectionAttributes attribs)
 {
     const int instID = spirv::InstanceCustomIndexKHR;
-    const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo));
+    const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v<STriangleGeomInfo>;
+    const STriangleGeomInfo geom = vk::BufferPointer<STriangleGeomInfo, STriangleGeomInfoAlignment>(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get();
     const Material material = nbl::hlsl::_static_cast<Material>(geom.material);
     
     const float attenuation = (1.f-material.alpha) * payload.attenuation;
diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp
index 59b610f4b..ecaf53b7f 100644
--- a/71_RayTracingPipeline/main.cpp
+++ b/71_RayTracingPipeline/main.cpp
@@ -3,6 +3,8 @@
 // For conditions of distribution and use, see copyright notice in nabla.h
 #include "common.hpp"
 
+#include "nbl/this_example/builtin/build/spirv/keys.hpp"
+
 #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h"
 #include "nbl/builtin/hlsl/indirect_commands.hlsl"
 
@@ -106,95 +108,42 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui
 		if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system)))
 			return false;
 
-		smart_refctd_ptr<IShaderCompiler::CCache> shaderReadCache = nullptr;
-		smart_refctd_ptr<IShaderCompiler::CCache> shaderWriteCache = core::make_smart_refctd_ptr<IShaderCompiler::CCache>();
-		auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin";
-
-		{
-			core::smart_refctd_ptr<system::IFile> shaderReadCacheFile;
-			{
-				system::ISystem::future_t<core::smart_refctd_ptr<system::IFile>> future;
-				m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ);
-				if (future.wait())
-				{
-					future.acquire().move_into(shaderReadCacheFile);
-					if (shaderReadCacheFile)
-					{
-						const size_t size = shaderReadCacheFile->getSize();
-						if (size > 0ull)
-						{
-							std::vector<uint8_t> contents(size);
-							system::IFile::success_t succ;
-							shaderReadCacheFile->read(succ, contents.data(), 0, size);
-							if (succ)
-								shaderReadCache = IShaderCompiler::CCache::deserialize(contents);
-						}
-					}
-				}
-				else
-					m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR);
-			}
-
-		}
-
 		// Load Custom Shader
-		auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr<IShader>
+		auto loadPrecompiledShader = [&]<core::StringLiteral ShaderKey>() -> smart_refctd_ptr<IShader>
 			{
 				IAssetLoader::SAssetLoadParams lp = {};
 				lp.logger = m_logger.get();
-				lp.workingDirectory = ""; // virtual root
-				auto assetBundle = m_assetMgr->getAsset(relPath, lp);
+				lp.workingDirectory = "app_resources"; // virtual root
+				auto key = nbl::this_example::builtin::build::get_spirv_key<ShaderKey>(m_device.get());
+				auto assetBundle = m_assetMgr->getAsset(key.data(), lp);
 				const auto assets = assetBundle.getContents();
 				if (assets.empty())
 					return nullptr;
 
 				// lets go straight from ICPUSpecializedShader to IGPUSpecializedShader
-				auto sourceRaw = IAsset::castDown<IShader>(assets[0]);
-				if (!sourceRaw)
+				auto shader = IAsset::castDown<IShader>(assets[0]);
+				if (!shader)
+				{
+					m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR);
 					return nullptr;
+				}
 
-				return m_device->compileShader({ sourceRaw.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() });
+				return shader;
 			};
 
 		// load shaders
-		const auto raygenShader = loadCompileAndCreateShader("app_resources/raytrace.rgen.hlsl");
-		const auto closestHitShader = loadCompileAndCreateShader("app_resources/raytrace.rchit.hlsl");
-		const auto proceduralClosestHitShader = loadCompileAndCreateShader("app_resources/raytrace_procedural.rchit.hlsl");
-		const auto intersectionHitShader = loadCompileAndCreateShader("app_resources/raytrace.rint.hlsl");
-		const auto anyHitShaderColorPayload = loadCompileAndCreateShader("app_resources/raytrace.rahit.hlsl");
-		const auto anyHitShaderShadowPayload = loadCompileAndCreateShader("app_resources/raytrace_shadow.rahit.hlsl");
-		const auto missShader = loadCompileAndCreateShader("app_resources/raytrace.rmiss.hlsl");
-		const auto missShadowShader = loadCompileAndCreateShader("app_resources/raytrace_shadow.rmiss.hlsl");
-		const auto directionalLightCallShader = loadCompileAndCreateShader("app_resources/light_directional.rcall.hlsl");
-		const auto pointLightCallShader = loadCompileAndCreateShader("app_resources/light_point.rcall.hlsl");
-		const auto spotLightCallShader = loadCompileAndCreateShader("app_resources/light_spot.rcall.hlsl");
-		const auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl");
-
-		core::smart_refctd_ptr<system::IFile> shaderWriteCacheFile;
-		{
-			system::ISystem::future_t<core::smart_refctd_ptr<system::IFile>> future;
-			m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json
-			m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE);
-			if (future.wait())
-			{
-				future.acquire().move_into(shaderWriteCacheFile);
-				if (shaderWriteCacheFile)
-				{
-					auto serializedCache = shaderWriteCache->serialize();
-					if (shaderWriteCacheFile)
-					{
-						system::IFile::success_t succ;
-						shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize());
-						if (!succ)
-							m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR);
-					}
-				}
-				else
-					m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR);
-			}
-			else
-				m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR);
-		}
+		const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl"
+		const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl"
+		const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl"
+		const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl"
+		const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl"
+		const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl"
+		const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl"
+		const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl"
+		const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl"
+		const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl"
+		const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl"
+		const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl"
 
 		m_semaphore = m_device->createSemaphore(m_realFrameIx);
 		if (!m_semaphore)
diff --git a/72_CooperativeBinarySearch/CMakeLists.txt b/72_CooperativeBinarySearch/CMakeLists.txt
new file mode 100644
index 000000000..b7e52875d
--- /dev/null
+++ b/72_CooperativeBinarySearch/CMakeLists.txt
@@ -0,0 +1,24 @@
+include(common RESULT_VARIABLE RES)
+if(NOT RES)
+	message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
+endif()
+
+nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")
+
+if(NBL_EMBED_BUILTIN_RESOURCES)
+	set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
+	set(RESOURCE_DIR "app_resources") 
+
+	get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
+	get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
+	get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)
+
+    file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
+    foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
+      LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
+    endforeach()
+
+	ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")
+
+	LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
+endif()
\ No newline at end of file
diff --git a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl
new file mode 100644
index 000000000..0834e8f91
--- /dev/null
+++ b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl
@@ -0,0 +1,120 @@
+// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#pragma wave shader_stage(compute)
+
+#include "common.h"
+
+#include "nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl"
+
+using namespace nbl::hlsl;
+
+[[vk::push_constant]] PushConstants Constants;
+[[vk::binding(0)]] StructuredBuffer<uint> Histogram;
+[[vk::binding(1)]] RWStructuredBuffer<uint> Output;
+
+
+uint getNextPowerOfTwo(uint number) {
+	return 2 << firstbithigh(number - 1);
+}
+
+uint getLaneWithFirstBitSet(bool condition) {
+	uint4 ballot = WaveActiveBallot(condition);
+	if (all(ballot == 0)) {
+		return WaveGetLaneCount();
+	}
+	return nbl::hlsl::glsl::subgroupBallotFindLSB(ballot);
+}
+
+// findValue must be the same across the entire wave
+// Could use something like WaveReadFirstLane to be fully sure
+uint binarySearchLowerBoundFindValue(uint findValue, StructuredBuffer<uint> searchBuffer, uint searchBufferSize) {
+	uint lane = WaveGetLaneIndex();
+	
+	uint left = 0;
+	uint right = searchBufferSize - 1;
+
+	uint32_t range = getNextPowerOfTwo(right - left);
+	// do pivots as long as we can't coalesced load
+	while (range > WaveGetLaneCount())
+	{
+		// there must be at least 1 gap between subsequent pivots 
+		const uint32_t step = range / WaveGetLaneCount(); 
+		const uint32_t halfStep = step >> 1;
+		const uint32_t pivotOffset = lane * step+halfStep;
+		const uint32_t pivotIndex = left + pivotOffset;
+
+		uint4 notGreaterPivots = WaveActiveBallot(pivotIndex < right && !(findValue < searchBuffer[pivotIndex]));
+		uint partition = nbl::hlsl::glsl::subgroupBallotBitCount(notGreaterPivots);
+		// only move left if needed
+		if (partition != 0)
+			left += partition * step - halfStep;
+		// if we go into final half partition, the range becomes less too
+		range = partition != WaveGetLaneCount() ? step : halfStep;
+	}
+
+	uint threadSearchIndex = left + lane;
+	bool laneValid = threadSearchIndex < searchBufferSize;
+	uint histAtIndex = laneValid ? searchBuffer[threadSearchIndex] : -1;
+	uint firstLaneGreaterThan = getLaneWithFirstBitSet(histAtIndex > findValue);
+
+	return left + firstLaneGreaterThan - 1;
+}
+
+static const uint32_t GroupsharedSize = WorkgroupSize;
+groupshared uint shared_groupSearchBufferMinIndex;
+groupshared uint shared_groupSearchBufferMaxIndex;
+groupshared uint shared_groupSearchValues[WorkgroupSize];
+
+// Binary search using the entire workgroup, making it log32 or log64 (every iteration, the possible set of 
+// values is divided by the number of lanes in a wave)
+uint binarySearchLowerBoundCooperative(uint groupIndex, uint groupThread, StructuredBuffer<uint> searchBuffer, uint searchBufferSize) {
+	uint minSearchValue = groupIndex.x * GroupsharedSize;
+	uint maxSearchValue = ((groupIndex.x + 1) * GroupsharedSize) - 1;
+
+	// On each workgroup, two subgroups do the search
+	// - One searches for the minimum, the other searches for the maximum
+	// - Store the minimum and maximum on groupshared memory, then do a barrier
+	uint wave = groupThread / WaveGetLaneCount();
+	if (wave < 2) {
+		uint search = wave == 0 ? minSearchValue : maxSearchValue;
+		uint searchResult = binarySearchLowerBoundFindValue(search, searchBuffer, searchBufferSize);
+		if (WaveIsFirstLane()) {
+			if (wave == 0) shared_groupSearchBufferMinIndex = searchResult;
+			else shared_groupSearchBufferMaxIndex = searchResult;
+		}
+	}
+	GroupMemoryBarrierWithGroupSync();
+
+	// Since every instance has at least one triangle, we know that having workgroup values 
+	// for each value in the range of minimum to maximum will suffice.
+
+	// Write every value in the range to groupshared memory and barrier.
+	uint idx = shared_groupSearchBufferMinIndex + groupThread.x;
+	if (idx <= shared_groupSearchBufferMaxIndex) {
+		shared_groupSearchValues[groupThread.x] = searchBuffer[idx];
+	}
+	GroupMemoryBarrierWithGroupSync();
+
+	uint maxValueIndex = shared_groupSearchBufferMaxIndex - shared_groupSearchBufferMinIndex;
+
+	uint searchValue = minSearchValue + groupThread;
+	uint currentSearchValueIndex = 0;
+	uint laneValue = shared_groupSearchBufferMaxIndex;
+	while (currentSearchValueIndex <= maxValueIndex) {
+		uint curValue = shared_groupSearchValues[currentSearchValueIndex];
+		if (curValue > searchValue) {
+			laneValue = shared_groupSearchBufferMinIndex + currentSearchValueIndex - 1;
+			break;
+		}
+		currentSearchValueIndex ++;
+	}
+
+	return laneValue;
+}
+
+[numthreads(WorkgroupSize,1,1)]
+void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID)
+{
+    Output[thread.x] = binarySearchLowerBoundCooperative(group.x, groupThread.x, Histogram, Constants.EntityCount);
+}
\ No newline at end of file
diff --git a/72_CooperativeBinarySearch/app_resources/common.h b/72_CooperativeBinarySearch/app_resources/common.h
new file mode 100644
index 000000000..65f606b08
--- /dev/null
+++ b/72_CooperativeBinarySearch/app_resources/common.h
@@ -0,0 +1,15 @@
+#ifndef _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_
+#define _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_
+
+#include <nbl/builtin/hlsl/cpp_compat/basic.h>
+#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>
+
+// TODO: NBL_CONSTEXPR_NSPC_VAR
+static const uint32_t WorkgroupSize = 256;
+
+struct PushConstants
+{
+	uint32_t EntityCount;
+};
+
+#endif // _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_
diff --git a/72_CooperativeBinarySearch/config.json.template b/72_CooperativeBinarySearch/config.json.template
new file mode 100644
index 000000000..24adf54fb
--- /dev/null
+++ b/72_CooperativeBinarySearch/config.json.template
@@ -0,0 +1,28 @@
+{
+  "enableParallelBuild": true,
+  "threadsPerBuildProcess" : 2,
+  "isExecuted": false,
+  "scriptPath": "",
+  "cmake": {
+    "configurations": [ "Release", "Debug", "RelWithDebInfo" ],
+    "buildModes": [],
+    "requiredOptions": []
+  }, 
+  "profiles": [
+    {
+      "backend": "vulkan",
+      "platform": "windows",
+      "buildModes": [],
+      "runConfiguration": "Release",
+      "gpuArchitectures": []
+    }
+  ],
+  "dependencies": [],
+  "data": [
+    {
+      "dependencies": [],
+      "command": [""],
+      "outputs": []
+    }
+  ]
+}
diff --git a/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp
new file mode 100644
index 000000000..3745ca512
--- /dev/null
+++ b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp
@@ -0,0 +1,11 @@
+#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
+
+#include "nbl/examples/examples.hpp"
+
+// example's own headers
+#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ?
+#include "nbl/ext/ImGui/ImGui.h"
+#include "imgui/imgui_internal.h"
+
+#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
\ No newline at end of file
diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp
new file mode 100644
index 000000000..81724c1b8
--- /dev/null
+++ b/72_CooperativeBinarySearch/main.cpp
@@ -0,0 +1,266 @@
+// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "nbl/examples/examples.hpp"
+#include "nbl/system/IApplicationFramework.h"
+#include "app_resources/common.h"
+
+#include <iostream>
+#include <cstdio>
+#include <assert.h>
+
+
+using namespace nbl;
+using namespace nbl::core;
+using namespace nbl::hlsl;
+using namespace nbl::system;
+using namespace nbl::asset;
+using namespace nbl::ui;
+using namespace nbl::video;
+using namespace nbl::examples;
+
+//
+constexpr uint32_t TestCaseIndices[] = {
+#include "testCaseData.h"
+};
+constexpr uint32_t numIndices = sizeof(TestCaseIndices) / sizeof(TestCaseIndices[0]);
+constexpr uint32_t lastValue = TestCaseIndices[numIndices - 1];
+// just some extra stuff over the edge
+constexpr uint32_t totalValues = lastValue + 100;
+
+
+void cpu_tests();
+
+class CooperativeBinarySearch final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication
+{
+    using device_base_t = application_templates::MonoDeviceApplication;
+    using asset_base_t = BuiltinResourcesApplication;
+public:
+    CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) :
+        IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {}
+
+    bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override
+    {
+        // Remember to call the base class initialization!
+        if (!device_base_t::onAppInitialized(smart_refctd_ptr(system)))
+            return false;
+        if (!asset_base_t::onAppInitialized(std::move(system)))
+            return false;
+
+        m_queue = m_device->getQueue(0, 0);
+        m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT);
+        m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger));
+
+        smart_refctd_ptr<IShader> shader;
+        {
+            IAssetLoader::SAssetLoadParams lp = {};
+            lp.logger = m_logger.get();
+            lp.workingDirectory = ""; // virtual root
+            auto assetBundle = m_assetMgr->getAsset("app_resources/binarySearch.comp.hlsl", lp);
+            const auto assets = assetBundle.getContents();
+            if (assets.empty())
+                return logFail("Could not load shader!");
+
+            auto source = IAsset::castDown<IShader>(assets[0]);
+            // The down-cast should not fail!
+            assert(source);
+
+            // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple
+            shader = m_device->compileShader({ source.get() });
+            if (!shader)
+                return logFail("Creation of a GPU Shader to from CPU Shader source failed!");
+        }
+
+		const uint32_t bindingCount = 2u;
+		IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {};
+		bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(0)]] StructuredBuffer<uint> Histogram;
+		bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(1)]] RWStructuredBuffer<uint> Output;
+        
+        for(int i = 0; i < bindingCount; ++i)
+        {
+            bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE;
+            bindings[i].count = 1;
+            bindings[i].binding = i;
+        }
+		m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings);
+        {
+		    SPushConstantRange pcRange = {};
+		    pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE;
+		    pcRange.offset = 0u;
+		    pcRange.size = sizeof(PushConstants);
+            auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout));
+            IGPUComputePipeline::SCreationParams params = {};
+            params.layout = layout.get();
+            params.shader.shader = shader.get();
+            params.shader.entryPoint = "main";
+            if (!m_device->createComputePipelines(nullptr, { &params,1 }, &m_pipeline))
+                return logFail("Failed to create compute pipeline!\n");
+        }
+        
+        const size_t sizes[2] = {sizeof(TestCaseIndices),sizeof(uint32_t)*totalValues};
+        for (uint32_t i = 0; i < bindingCount; i++)
+        {
+            m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams {
+                {.size = sizes[i], .usage =
+                    IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | 
+                    IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT,
+                }
+            });
+
+            auto reqs = m_buffers[i]->getMemoryReqs();
+            reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits();
+
+            m_allocations[i] = m_device->allocate(reqs, m_buffers[i].get());
+            
+            auto allocationType = i == 0 ? IDeviceMemoryAllocation::EMCAF_WRITE : IDeviceMemoryAllocation::EMCAF_READ;
+            auto mapResult = m_allocations[i].memory->map({ 0ull,m_allocations[i].memory->getAllocationSize() }, allocationType);
+            assert(mapResult);
+        }
+
+		smart_refctd_ptr<IDescriptorPool> descriptorPool = nullptr;
+		{
+            IDescriptorPool::SCreateInfo createInfo = {};
+            createInfo.maxSets = 1;
+            createInfo.maxDescriptorCount[static_cast<uint32_t>(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = bindingCount;
+            descriptorPool = m_device->createDescriptorPool(std::move(createInfo));
+        }
+
+        m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout));
+
+        IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {};
+        IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {};
+        
+        for(int i = 0; i < bindingCount; ++i)
+        {
+            writeDescriptorSets[i].info = &descriptorInfos[i];
+            writeDescriptorSets[i].dstSet = m_descriptorSet.get();
+            writeDescriptorSets[i].binding = i;
+            writeDescriptorSets[i].count = bindings[i].count;
+
+			descriptorInfos[i].desc = m_buffers[i];
+			descriptorInfos[i].info.buffer.size = ~0ull;
+        }
+
+        m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr);
+       
+        // Write test data to the m_buffers[0]
+        auto outPtr = m_allocations[0].memory->getMappedPointer();
+        assert(outPtr);
+        memcpy(
+            reinterpret_cast<void*>(outPtr), 
+            reinterpret_cast<const void*>(&TestCaseIndices[0]), 
+            sizeof(TestCaseIndices)
+        );
+
+        // In contrast to fences, we just need one semaphore to rule all dispatches
+        return true;
+    }
+
+    void onAppTerminated_impl() override
+    {
+        m_device->waitIdle();
+    }
+
+    void workLoopBody() override
+    {
+        cpu_tests();
+
+        constexpr auto StartedValue = 0;
+
+        smart_refctd_ptr<ISemaphore> progress = m_device->createSemaphore(StartedValue);
+
+        m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
+        m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
+
+
+        IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t layoutBufferBarrier[1] = { {
+            .barrier = {
+                .dep = {
+                    .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT,
+                    .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT,
+                    .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,
+                    .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS
+                }
+            },
+			// whole buffer because we transferred the contents into it
+			.range = {.offset = 0,.size = m_buffers[1]->getCreationParams().size,.buffer = m_buffers[1]}
+        } };
+
+        const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .bufBarriers = layoutBufferBarrier };
+        m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo);
+        
+
+        const IGPUDescriptorSet* set = m_descriptorSet.get();
+        PushConstants coopBinarySearchPC = {
+            .EntityCount = numIndices,
+        };
+
+        m_cmdbuf->bindComputePipeline(m_pipeline.get());
+        m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set);
+        m_cmdbuf->pushConstants(m_pipeline->getLayout(), nbl::hlsl::ShaderStage::ESS_COMPUTE, 0u, sizeof(PushConstants), &coopBinarySearchPC);
+        m_cmdbuf->dispatch((totalValues + 255u) / 256u, 1u, 1u);
+
+		layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT);
+        m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo);
+        
+        m_cmdbuf->end();
+
+        {
+            constexpr auto FinishedValue = 69;
+            IQueue::SSubmitInfo submitInfos[1] = {};
+            const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} };
+            submitInfos[0].commandBuffers = cmdbufs;
+            const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} };
+            submitInfos[0].signalSemaphores = signals;
+            m_api->startCapture();
+            m_queue->submit(submitInfos);
+            m_api->endCapture();
+            const ISemaphore::SWaitInfo waitInfos[] = { {
+                    .semaphore = progress.get(),
+                    .value = FinishedValue
+                } };
+            m_device->blockForSemaphores(waitInfos);
+        }
+
+        auto ptr = m_allocations[1].memory->getMappedPointer();
+        assert(ptr);
+
+        uint32_t* valuesPtr = reinterpret_cast<uint32_t*>(ptr);
+        for (uint32_t i = 0; i < totalValues; i++) {
+            uint32_t value = valuesPtr[i];
+            const uint32_t* binarySearchResult = std::upper_bound(TestCaseIndices, TestCaseIndices + numIndices, i);
+            uint32_t lowerBoundIndex = std::distance(TestCaseIndices, binarySearchResult) - 1;
+            assert(value == lowerBoundIndex);
+        }
+
+        m_keepRunning = false;
+    }
+
+    bool keepRunning() override
+    {
+        return m_keepRunning;
+    }
+
+
+private:
+    smart_refctd_ptr<IGPUComputePipeline> m_pipeline = nullptr;
+    smart_refctd_ptr<IGPUDescriptorSetLayout> m_descriptorSetLayout;
+    smart_refctd_ptr<IGPUDescriptorSet> m_descriptorSet;
+
+    smart_refctd_ptr<IGPUBuffer> m_buffers[2];
+	nbl::video::IDeviceMemoryAllocator::SAllocation m_allocations[2] = {};
+    smart_refctd_ptr<IGPUCommandBuffer> m_cmdbuf = nullptr;
+    IQueue* m_queue;
+    smart_refctd_ptr<IGPUCommandPool> m_commandPool;
+    uint64_t m_iteration = 0;
+    constexpr static inline uint64_t MaxIterations = 200;
+
+    bool m_keepRunning = true;
+};
+
+NBL_MAIN_FUNC(CooperativeBinarySearch)
+
+void cpu_tests()
+{
+}
diff --git a/72_CooperativeBinarySearch/testCaseData.h b/72_CooperativeBinarySearch/testCaseData.h
new file mode 100644
index 000000000..16153780e
--- /dev/null
+++ b/72_CooperativeBinarySearch/testCaseData.h
@@ -0,0 +1,1192 @@
+0,
+298,
+554,
+582,
+912,
+1074,
+1076,
+1078,
+1170,
+1188,
+2140,
+2414,
+2736,
+2738,
+3980,
+4800,
+5898,
+5900,
+6936,
+8106,
+8152,
+8650,
+8844,
+8930,
+9504,
+10244,
+10826,
+10828,
+11126,
+11430,
+12206,
+13764,
+14010,
+15302,
+15624,
+15656,
+16414,
+16494,
+17368,
+17432,
+18312,
+18948,
+19376,
+19818,
+20146,
+20604,
+21240,
+22446,
+23482,
+24914,
+25042,
+25538,
+26764,
+27564,
+27566,
+28472,
+29450,
+30202,
+31474,
+32160,
+32676,
+33792,
+33794,
+34704,
+36540,
+37456,
+37950,
+38364,
+39274,
+40442,
+40518,
+41412,
+41590,
+41950,
+42022,
+42714,
+43464,
+43790,
+43792,
+44876,
+44878,
+46188,
+46572,
+47352,
+47650,
+48242,
+49856,
+49858,
+50506,
+50968,
+50970,
+51152,
+51154,
+52870,
+52884,
+53332,
+53334,
+53904,
+53964,
+53966,
+53968,
+53970,
+53972,
+53974,
+53976,
+53978,
+53980,
+54514,
+54516,
+54518,
+54520,
+54762,
+55866,
+56462,
+56478,
+56480,
+56482,
+57510,
+57568,
+57570,
+57572,
+57846,
+57848,
+58760,
+59408,
+59438,
+60198,
+60200,
+60202,
+60204,
+60284,
+60938,
+61274,
+61720,
+62296,
+63116,
+63378,
+63380,
+63382,
+63384,
+63386,
+63388,
+63904,
+64572,
+65142,
+65144,
+65146,
+65554,
+65738,
+66052,
+67016,
+67424,
+67566,
+68270,
+68272,
+68610,
+69240,
+69870,
+70988,
+72622,
+73258,
+73260,
+73580,
+74524,
+74880,
+74958,
+74960,
+74962,
+75114,
+75116,
+75622,
+77144,
+77798,
+77800,
+78314,
+79566,
+79568,
+79570,
+79572,
+79850,
+79852,
+81576,
+81684,
+81686,
+82492,
+82494,
+82496,
+82498,
+83990,
+84860,
+84988,
+84990,
+85138,
+85772,
+86120,
+86122,
+86564,
+87402,
+87404,
+87602,
+88676,
+88714,
+88780,
+89560,
+89732,
+90786,
+91128,
+91130,
+91272,
+91522,
+91804,
+92588,
+92590,
+92834,
+93268,
+93736,
+94448,
+94704,
+94706,
+95074,
+95076,
+96706,
+97040,
+97770,
+98000,
+98676,
+99968,
+100074,
+100318,
+100602,
+100914,
+101020,
+101872,
+101878,
+103078,
+104246,
+104266,
+105436,
+106332,
+106954,
+107856,
+108954,
+110320,
+110780,
+111588,
+111882,
+112502,
+112676,
+113496,
+114070,
+115204,
+115422,
+115424,
+115858,
+116420,
+117426,
+118504,
+118870,
+119296,
+119618,
+119650,
+120408,
+120488,
+121362,
+121426,
+122306,
+122942,
+123370,
+123812,
+124140,
+124598,
+125234,
+126440,
+127476,
+128908,
+129036,
+129532,
+130758,
+131558,
+131560,
+132466,
+133444,
+134196,
+135468,
+136154,
+136670,
+137786,
+137788,
+138698,
+140534,
+140832,
+141608,
+142422,
+143220,
+143468,
+143714,
+144504,
+145078,
+145670,
+146224,
+146874,
+147726,
+148692,
+149536,
+151032,
+151126,
+153382,
+154128,
+155190,
+155212,
+156324,
+156484,
+156526,
+157026,
+158242,
+158446,
+158448,
+158594,
+159256,
+160350,
+160444,
+161040,
+161624,
+162418,
+162524,
+162768,
+163052,
+163364,
+163470,
+164322,
+164328,
+165528,
+166696,
+166716,
+167886,
+168782,
+169404,
+170306,
+171404,
+172770,
+173230,
+174038,
+174332,
+174952,
+175126,
+175946,
+176520,
+177654,
+177872,
+177874,
+178308,
+178870,
+179876,
+180954,
+181320,
+181746,
+182160,
+183070,
+184238,
+184314,
+185208,
+185386,
+185746,
+185818,
+186510,
+187260,
+187586,
+187588,
+188672,
+188674,
+189984,
+190368,
+191148,
+191446,
+192038,
+193652,
+193654,
+194302,
+194764,
+194766,
+194948,
+194950,
+196666,
+196680,
+197128,
+197130,
+197700,
+198048,
+198824,
+199638,
+200436,
+200684,
+200930,
+201720,
+202294,
+202886,
+203440,
+204090,
+204942,
+205908,
+206752,
+208248,
+208342,
+210598,
+211344,
+212406,
+212428,
+213540,
+213700,
+213742,
+214242,
+215458,
+215662,
+215664,
+215810,
+216472,
+217566,
+217660,
+218256,
+218316,
+218318,
+218320,
+218322,
+218324,
+218326,
+218328,
+218330,
+218332,
+218866,
+218868,
+218870,
+218872,
+219114,
+220218,
+220814,
+220830,
+220832,
+220834,
+221862,
+221920,
+221922,
+221924,
+222198,
+222200,
+223112,
+223760,
+223790,
+224550,
+224552,
+224554,
+224556,
+225140,
+225794,
+226130,
+226576,
+227152,
+227972,
+228234,
+228236,
+228238,
+228240,
+228242,
+228244,
+228760,
+229428,
+229998,
+230000,
+230002,
+230410,
+230594,
+230908,
+231872,
+232280,
+232422,
+233126,
+233128,
+233466,
+234096,
+234726,
+235844,
+237478,
+238114,
+238116,
+238512,
+239256,
+239812,
+240660,
+241950,
+243244,
+243366,
+244346,
+244412,
+244710,
+245202,
+246504,
+246728,
+246988,
+247592,
+248630,
+249562,
+250962,
+251964,
+252562,
+253140,
+253412,
+254672,
+255276,
+256084,
+256160,
+256378,
+257104,
+257602,
+257776,
+258240,
+258556,
+258614,
+259208,
+260496,
+261202,
+261398,
+262284,
+262610,
+262976,
+263578,
+264622,
+265558,
+266692,
+266756,
+268110,
+268994,
+269158,
+269718,
+270388,
+270768,
+271098,
+271786,
+272398,
+272996,
+273140,
+273612,
+274226,
+274660,
+275070,
+275416,
+275634,
+275680,
+276088,
+276408,
+276410,
+276852,
+277690,
+277692,
+277890,
+278964,
+279002,
+279068,
+279848,
+280020,
+281074,
+281416,
+281418,
+281560,
+281810,
+282092,
+282876,
+282878,
+283122,
+283556,
+284024,
+284736,
+284992,
+284994,
+285362,
+285364,
+286994,
+287328,
+288058,
+288288,
+288964,
+289708,
+289746,
+290266,
+291136,
+292152,
+292740,
+292834,
+293708,
+293768,
+293936,
+294846,
+295028,
+295040,
+295130,
+295372,
+296154,
+296736,
+297250,
+297606,
+298068,
+298310,
+299420,
+300362,
+301176,
+301502,
+301878,
+302702,
+303576,
+303896,
+305170,
+305928,
+306070,
+306150,
+307094,
+307450,
+307528,
+307530,
+307532,
+307684,
+307686,
+308192,
+309714,
+310368,
+310370,
+310884,
+312136,
+312138,
+312140,
+312142,
+312420,
+312422,
+314146,
+314254,
+314256,
+315062,
+315064,
+315066,
+315068,
+316560,
+317430,
+317558,
+317560,
+317708,
+318342,
+319182,
+319992,
+320612,
+320956,
+321068,
+321076,
+322784,
+322914,
+323106,
+324036,
+324708,
+326092,
+326994,
+327332,
+328080,
+328444,
+329022,
+329256,
+330454,
+331304,
+331610,
+332432,
+332440,
+333298,
+334300,
+334478,
+334622,
+335370,
+335818,
+336456,
+336618,
+337930,
+338932,
+339158,
+339258,
+339746,
+340226,
+340254,
+340256,
+340988,
+341638,
+342674,
+343168,
+343440,
+344024,
+344026,
+344106,
+345118,
+346124,
+347350,
+348560,
+348878,
+349066,
+350192,
+350840,
+351388,
+353610,
+354562,
+355208,
+356084,
+356966,
+358222,
+359304,
+359470,
+360054,
+360710,
+360920,
+361896,
+362930,
+362962,
+363128,
+363234,
+363272,
+363284,
+363456,
+363732,
+364418,
+364926,
+365096,
+365170,
+365920,
+366796,
+367838,
+368232,
+368940,
+369508,
+369530,
+370886,
+371156,
+371348,
+372384,
+372680,
+372690,
+373252,
+373676,
+374168,
+374424,
+374452,
+374782,
+374944,
+374946,
+374948,
+375040,
+375058,
+376010,
+376284,
+376606,
+376608,
+377850,
+378670,
+379768,
+379770,
+380806,
+381976,
+382022,
+382520,
+382714,
+382800,
+383374,
+384114,
+384696,
+384698,
+384996,
+385300,
+386076,
+387634,
+387880,
+388796,
+389290,
+389302,
+389314,
+389338,
+389406,
+389434,
+389470,
+389840,
+389952,
+390908,
+391076,
+391188,
+392118,
+392458,
+392472,
+392622,
+392766,
+393448,
+394586,
+394816,
+394824,
+395486,
+396218,
+396880,
+396910,
+397066,
+397076,
+397124,
+397678,
+398050,
+399160,
+400080,
+401696,
+401762,
+402400,
+402500,
+402512,
+403152,
+404038,
+404444,
+404648,
+404740,
+405322,
+406252,
+407076,
+408252,
+408634,
+409354,
+410112,
+411138,
+411672,
+411880,
+412232,
+412926,
+412956,
+413864,
+414624,
+415770,
+415978,
+417234,
+417256,
+417264,
+418562,
+418812,
+418824,
+418836,
+418860,
+418928,
+418956,
+418992,
+419362,
+419474,
+420430,
+420598,
+420710,
+421640,
+421980,
+421994,
+422144,
+422288,
+422970,
+424108,
+424338,
+424346,
+425008,
+425740,
+426402,
+426432,
+426588,
+426598,
+426646,
+427200,
+427572,
+428682,
+429602,
+430346,
+430412,
+431050,
+431150,
+431162,
+431802,
+432688,
+433094,
+433298,
+433390,
+433972,
+434902,
+435726,
+436902,
+437284,
+438004,
+438762,
+439788,
+440322,
+440530,
+440882,
+441576,
+441606,
+442514,
+443274,
+444420,
+444628,
+445884,
+445906,
+445914,
+447212,
+447462,
+448464,
+448690,
+448790,
+449278,
+449758,
+449786,
+449788,
+450520,
+451170,
+452206,
+452700,
+452972,
+453556,
+453558,
+453638,
+454650,
+455656,
+456882,
+458092,
+458410,
+458598,
+459724,
+460372,
+460920,
+463142,
+464094,
+464740,
+465616,
+466498,
+467754,
+468836,
+469002,
+469586,
+470180,
+471468,
+472174,
+472370,
+473256,
+473582,
+473948,
+474550,
+475594,
+476530,
+477664,
+477728,
+479082,
+479966,
+480130,
+480690,
+481360,
+481740,
+482070,
+482758,
+483370,
+483968,
+484112,
+484584,
+485198,
+485632,
+486042,
+486388,
+486606,
+486652,
+487060,
+488676,
+489420,
+489976,
+490824,
+492114,
+493408,
+493530,
+494510,
+494576,
+494874,
+495366,
+496668,
+496892,
+497152,
+497756,
+498794,
+499726,
+501126,
+502128,
+502726,
+503304,
+503576,
+504836,
+505440,
+506248,
+506324,
+506542,
+507268,
+507766,
+507940,
+508404,
+508720,
+509514,
+510170,
+510380,
+511356,
+512390,
+512422,
+512588,
+512694,
+512732,
+512744,
+512916,
+513192,
+513878,
+514386,
+514556,
+514630,
+515380,
+516256,
+517298,
+517692,
+518400,
+518968,
+518990,
+520346,
+520616,
+520808,
+521844,
+522140,
+522150,
+522712,
+523136,
+523628,
+524468,
+525278,
+525898,
+526242,
+526354,
+526362,
+528070,
+528200,
+528392,
+529322,
+529994,
+531378,
+532280,
+532618,
+533366,
+533730,
+534308,
+534542,
+535740,
+536590,
+536896,
+537718,
+537726,
+538584,
+539586,
+539764,
+539908,
+540656,
+541104,
+541742,
+541904,
+543216,
+543612,
+543650,
+544170,
+545040,
+546056,
+546644,
+546738,
+547612,
+547672,
+547840,
+548750,
+548932,
+548944,
+549034,
+549276,
+550058,
+550640,
+551154,
+551510,
+551972,
+552214,
+553324,
+554266,
+555080,
+555406,
+555782,
+556606,
+557480,
+557800,
+559074,
+559832,
+559974,
+550468,
+551276,
+552568,
+552866,
+553798,
+554120,
+554294,
+555554,
+556448,
+556874,
+557328,
+557680,
+558532,
+559844,
+560774,
+561050,
+561458,
+562684,
+563910,
+564026,
+564542,
+565294,
+565434,
+566278,
+567580,
+568006,
+568328,
+569626,
+570350,
+570998,
+572812,
+573008,
+573500,
+573828,
+573840,
+573842,
+574798,
+576066,
+576774,
+577182,
+577184,
+577522,
+577524,
+578734,
+579854,
+579856,
+581128,
+581278,
+582296,
+583496,
+583944,
+584160,
+584844,
+584954,
+584968,
+585486,
+586592,
+586594,
+587158,
+587320,
+588006,
+589012,
+590302,
+590366,
+590444,
+590944,
+581786,
+582234,
+582920,
+582922,
+564780,
+565486,
+565684,
+566570,
+566896,
+567262,
+567864,
+568958,
+570268,
+570844,
+572014,
+573368,
+574252,
+574416,
+574976,
+575646,
+576026,
+576356,
+577044,
+577046,
+577644,
+577788,
+578260,
+578874,
+579308,
+579718,
+580288,
+580942,
+581534,
+581536,
+576350,
+576352
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8ce94f93..cbe482aa4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,6 +45,7 @@ if(NBL_BUILD_EXAMPLES)
 	add_subdirectory(12_MeshLoaders)
 	#
 	add_subdirectory(13_MaterialCompilerTest)
+	add_subdirectory(14_Mortons EXCLUDE_FROM_ALL)
 
 	# Waiting for a refactor
 	#add_subdirectory(27_PLYSTLDemo)
@@ -87,6 +88,7 @@ if(NBL_BUILD_EXAMPLES)
 
   	add_subdirectory(70_FLIPFluids)
 	add_subdirectory(71_RayTracingPipeline)
+	add_subdirectory(72_CooperativeBinarySearch)
 
 	# add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory
 	NBL_GET_ALL_TARGETS(TARGETS)