From f2ea51d0b3e3388c0f9bae03602ec3b1f658c124 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Sun, 23 Mar 2025 19:29:49 -0300 Subject: [PATCH 01/57] Morton code tests --- CMakeLists.txt | 3 +- XX_Mortons/CMakeLists.txt | 24 ++++++++++ XX_Mortons/app_resources/shader.hlsl | 7 +++ XX_Mortons/config.json.template | 28 +++++++++++ XX_Mortons/main.cpp | 69 ++++++++++++++++++++++++++++ XX_Mortons/pipeline.groovy | 50 ++++++++++++++++++++ 6 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 XX_Mortons/CMakeLists.txt create mode 100644 XX_Mortons/app_resources/shader.hlsl create mode 100644 XX_Mortons/config.json.template create mode 100644 XX_Mortons/main.cpp create mode 100644 XX_Mortons/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index fb03f95a4..7fcddfc18 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,8 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(67_RayQueryGeometry EXCLUDE_FROM_ALL) add_subdirectory(68_JpegLoading EXCLUDE_FROM_ALL) - add_subdirectory(70_FLIPFluids EXCLUDE_FROM_ALL) + add_subdirectory(70_FLIPFluids EXCLUDE_FROM_ALL) + add_subdirectory(XX_Mortons EXCLUDE_FROM_ALL) NBL_HOOK_COMMON_API("${NBL_COMMON_API_TARGETS}") endif() diff --git a/XX_Mortons/CMakeLists.txt b/XX_Mortons/CMakeLists.txt new file mode 100644 index 000000000..a434ff32a --- /dev/null +++ b/XX_Mortons/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/XX_Mortons/app_resources/shader.hlsl b/XX_Mortons/app_resources/shader.hlsl new file mode 100644 index 000000000..a24a78191 --- /dev/null +++ b/XX_Mortons/app_resources/shader.hlsl @@ -0,0 +1,7 @@ +#include "nbl/builtin/hlsl/math/morton.hlsl" + +[numthreads(512, 1, 1)] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + printf("%d %d", nbl::hlsl::morton::impl::decode_masks_array::Masks[0], nbl::hlsl::morton::impl::decode_masks_array::Masks[1]); +} \ No newline at end of file diff --git a/XX_Mortons/config.json.template b/XX_Mortons/config.json.template new file mode 100644 index 000000000..717d05d53 --- /dev/null +++ b/XX_Mortons/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", // should be none + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/XX_Mortons/main.cpp b/XX_Mortons/main.cpp new file mode 100644 index 000000000..881c84417 --- /dev/null +++ b/XX_Mortons/main.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. +#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +#include "nbl/builtin/hlsl/math/morton.hlsl" +#include + +using namespace nbl; +using namespace core; +using namespace system; +using namespace asset; +using namespace video; + + +// this time instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platforms +class MortonTestApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + + inline core::smart_refctd_ptr createShader( + const char* includeMainName) + { + std::string prelude = "#include \""; + auto CPUShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_SHADER_STAGE::ESS_COMPUTE, IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); + assert(CPUShader); + return m_device->createShader(CPUShader.get()); + } + public: + MortonTestApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + createShader("app_resources/shader.hlsl"); + + const auto masksArray = hlsl::morton::impl::decode_masks_array::Masks; + for (auto i = 0u; i < 3; i++) + { + std::cout << std::bitset<32>(masksArray[i]) << std::endl; + } + + return true; + } + + // Platforms like WASM expect the main entry point to periodically return control, hence if you want a crossplatform app, you have to let the framework deal with your "game loop" + void workLoopBody() override {} + + // Whether to keep invoking the above. In this example because its headless GPU compute, we do all the work in the app initialization. + bool keepRunning() override {return false;} + + private: + smart_refctd_ptr m_api; +}; + + +NBL_MAIN_FUNC(MortonTestApp) \ No newline at end of file diff --git a/XX_Mortons/pipeline.groovy b/XX_Mortons/pipeline.groovy new file mode 100644 index 000000000..1a7b043a4 --- /dev/null +++ b/XX_Mortons/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +{ + public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) +} + +return this \ No newline at end of file From 8f4e4529ca6f31ace6498cf9ac4284c14dbdf652 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 25 Mar 2025 10:44:31 -0300 Subject: [PATCH 02/57] Morton codes creating properly --- XX_Mortons/app_resources/common.hlsl | 10 ++ XX_Mortons/app_resources/shader.hlsl | 15 +- XX_Mortons/main.cpp | 241 ++++++++++++++++++++++++++- 3 files changed, 259 insertions(+), 7 deletions(-) create mode 100644 XX_Mortons/app_resources/common.hlsl diff --git a/XX_Mortons/app_resources/common.hlsl b/XX_Mortons/app_resources/common.hlsl new file mode 100644 index 000000000..3a9fca3fa --- /dev/null +++ b/XX_Mortons/app_resources/common.hlsl @@ -0,0 +1,10 @@ +#include "nbl/builtin/hlsl/math/morton.hlsl" + +NBL_CONSTEXPR uint32_t bufferSize = 256; +using scalar_t = int32_t; +using unsigned_scalar_t = nbl::hlsl::make_unsigned_t; + +struct PushConstantData +{ + uint64_t deviceBufferAddress; +}; \ No newline at end of file diff --git a/XX_Mortons/app_resources/shader.hlsl b/XX_Mortons/app_resources/shader.hlsl index a24a78191..d1f7c967e 100644 --- a/XX_Mortons/app_resources/shader.hlsl +++ b/XX_Mortons/app_resources/shader.hlsl @@ -1,7 +1,16 @@ -#include "nbl/builtin/hlsl/math/morton.hlsl" +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/bda/legacy_bda_accessor.hlsl" -[numthreads(512, 1, 1)] +[[vk::push_constant]] PushConstantData pushConstants; + +using namespace nbl::hlsl; + +[numthreads(bufferSize, 1, 1)] void main(uint32_t3 ID : SV_DispatchThreadID) { - printf("%d %d", nbl::hlsl::morton::impl::decode_masks_array::Masks[0], nbl::hlsl::morton::impl::decode_masks_array::Masks[1]); + LegacyBdaAccessor accessor = LegacyBdaAccessor::create(pushConstants.deviceBufferAddress); + + morton::code foo = morton::code::create(vector(-32768, -1)); + + accessor.set(0, foo.value); } \ No newline at end of file diff --git a/XX_Mortons/main.cpp b/XX_Mortons/main.cpp index 881c84417..860b581d2 100644 --- a/XX_Mortons/main.cpp +++ b/XX_Mortons/main.cpp @@ -7,7 +7,7 @@ #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "nbl/builtin/hlsl/math/morton.hlsl" +#include "app_resources/common.hlsl" #include using namespace nbl; @@ -16,7 +16,6 @@ using namespace system; using namespace asset; using namespace video; - // this time instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platforms class MortonTestApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication { @@ -44,14 +43,221 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, if (!asset_base_t::onAppInitialized(std::move(system))) return false; - createShader("app_resources/shader.hlsl"); + auto shader = createShader("app_resources/shader.hlsl"); + + // Create massive upload/download buffers + constexpr uint32_t DownstreamBufferSize = sizeof(unsigned_scalar_t) << 23; + constexpr uint32_t UpstreamBufferSize = sizeof(unsigned_scalar_t) << 23; + + m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + if (!m_utils) + return logFail("Failed to create Utilities!"); + m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); + m_downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + m_upStreamingBufferAddress = m_upStreamingBuffer->getBuffer()->getDeviceAddress(); + m_downStreamingBufferAddress = m_downStreamingBuffer->getBuffer()->getDeviceAddress(); + + // Create device-local buffer + { + IGPUBuffer::SCreationParams deviceLocalBufferParams = {}; + + IQueue* const queue = getComputeQueue(); + uint32_t queueFamilyIndex = queue->getFamilyIndex(); + + deviceLocalBufferParams.queueFamilyIndexCount = 1; + deviceLocalBufferParams.queueFamilyIndices = &queueFamilyIndex; + deviceLocalBufferParams.size = sizeof(unsigned_scalar_t) * bufferSize; + deviceLocalBufferParams.usage = nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + + m_deviceLocalBuffer = m_device->createBuffer(std::move(deviceLocalBufferParams)); + auto mreqs = m_deviceLocalBuffer->getMemoryReqs(); + mreqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + auto gpubufMem = m_device->allocate(mreqs, m_deviceLocalBuffer.get(), IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT); + + m_deviceLocalBufferAddress = m_deviceLocalBuffer.get()->getDeviceAddress(); + } + + const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(PushConstantData) }; + + { + auto layout = m_device->createPipelineLayout({ &pcRange,1 }); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)); + params.shader.requireFullSubgroups = true; + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + return logFail("Failed to create compute pipeline!\n"); + } + + const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); + // The ranges of non-coherent mapped memory you flush or invalidate need to be aligned. You'll often see a value of 64 reported by devices + // which just happens to coincide with a CPU cache line size. So we ask our streaming buffers during allocation to give us properly aligned offsets. + // Sidenote: For SSBOs, UBOs, BufferViews, Vertex Buffer Bindings, Acceleration Structure BDAs, Shader Binding Tables, Descriptor Buffers, etc. + // there is also a requirement to bind buffers at offsets which have a certain alignment. Memory binding to Buffers and Images also has those. + // We'll align to max of coherent atom size even if the memory is coherent, + // and we also need to take into account BDA shader loads need to be aligned to the type being loaded. + m_alignment = core::max(deviceLimits.nonCoherentAtomSize, alignof(float)); + + // Semaphor used here to know the FFT is done before download + m_timeline = m_device->createSemaphore(semaphorValue); + + IQueue* const queue = getComputeQueue(); + + const uint32_t inputSize = sizeof(unsigned_scalar_t) * bufferSize; + + // Just need a single suballocation in this example + const uint32_t AllocationCount = 1; + + // It comes with a certain drawback that you need to remember to initialize your "yet unallocated" offsets to the Invalid value + // this is to allow a set of allocations to fail, and you to re-try after doing something to free up space without repacking args. + auto inputOffset = m_upStreamingBuffer->invalid_value; + + // We always just wait till an allocation becomes possible (during allocation previous "latched" frees get their latch conditions polled) + // Freeing of Streaming Buffer Allocations can and should be deferred until an associated polled event signals done (more on that later). + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + // note that the API takes a time-point not a duration, because there are multiple waits and preemptions possible, so the durations wouldn't add up properly + m_upStreamingBuffer->multi_allocate(waitTill, AllocationCount, &inputOffset, &inputSize, &m_alignment); + + // Generate our data in-place on the allocated staging buffer. Packing is interleaved in this example! + { + auto* const inputPtr = reinterpret_cast(reinterpret_cast(m_upStreamingBuffer->getBufferPointer()) + inputOffset); + for (auto j = 0; j < bufferSize; j++) + { + unsigned_scalar_t x = j > 0 ? 0.f : 2.f; + unsigned_scalar_t y = 0; + + /* + unsigned_scalar_t x = 1.f; + unsigned_scalar_t y = 0.f; + */ + + inputPtr[2 * j] = x; + inputPtr[2 * j + 1] = y; + } + // Always remember to flush! + if (m_upStreamingBuffer->needsManualFlushOrInvalidate()) + { + const auto bound = m_upStreamingBuffer->getBuffer()->getBoundMemory(); + const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset + inputOffset, inputSize); + m_device->flushMappedMemoryRanges(1, &range); + } + } + + // finally allocate our output range + const uint32_t outputSize = inputSize; + auto outputOffset = m_downStreamingBuffer->invalid_value; + m_downStreamingBuffer->multi_allocate(waitTill, AllocationCount, &outputOffset, &outputSize, &m_alignment); + + smart_refctd_ptr cmdbuf; + { + smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdbuf)) { + return logFail("Failed to create Command Buffers!\n"); + } + cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmdbuf,1 }, core::smart_refctd_ptr(m_logger)); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_pipeline.get()); + // This is the new fun part, pushing constants + const PushConstantData pc = { .deviceBufferAddress = m_deviceLocalBufferAddress }; + IGPUCommandBuffer::SBufferCopy copyInfo = {}; + copyInfo.srcOffset = 0; + copyInfo.dstOffset = 0; + copyInfo.size = m_deviceLocalBuffer->getSize(); + cmdbuf->copyBuffer(m_upStreamingBuffer->getBuffer(), m_deviceLocalBuffer.get(), 1, ©Info); + cmdbuf->pushConstants(m_pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(pc), &pc); + // Remember we do a single workgroup per 1D array in these parts + cmdbuf->dispatch(1, 1, 1); + + // Pipeline barrier: wait for FFT shader to be done before copying to downstream buffer + IGPUCommandBuffer::SPipelineBarrierDependencyInfo pipelineBarrierInfo = {}; + + decltype(pipelineBarrierInfo)::buffer_barrier_t barrier = {}; + pipelineBarrierInfo.bufBarriers = { &barrier, 1u }; + + barrier.range.buffer = m_deviceLocalBuffer; + + barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; + barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS; + barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS; + + cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS(0), pipelineBarrierInfo); + cmdbuf->copyBuffer(m_deviceLocalBuffer.get(), m_downStreamingBuffer->getBuffer(), 1, ©Info); + cmdbuf->end(); + } + + semaphorValue++; + { + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = + { + .cmdbuf = cmdbuf.get() + }; + const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = + { + .semaphore = m_timeline.get(), + .value = semaphorValue, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + }; + + const IQueue::SSubmitInfo submitInfo = { + .waitSemaphores = {}, + .commandBuffers = {&cmdbufInfo,1}, + .signalSemaphores = {&signalInfo,1} + }; + + m_api->startCapture(); + queue->submit({ &submitInfo,1 }); + m_api->endCapture(); + } + + // We let all latches know what semaphore and counter value has to be passed for the functors to execute + const ISemaphore::SWaitInfo futureWait = { m_timeline.get(),semaphorValue }; + + // As promised, we can defer an upstreaming buffer deallocation until a fence is signalled + // You can also attach an additional optional IReferenceCounted derived object to hold onto until deallocation. + m_upStreamingBuffer->multi_deallocate(AllocationCount, &inputOffset, &inputSize, futureWait); + + // Now a new and even more advanced usage of the latched events, we make our own refcounted object with a custom destructor and latch that like we did the commandbuffer. + // Instead of making our own and duplicating logic, we'll use one from IUtilities meant for down-staging memory. + // Its nice because it will also remember to invalidate our memory mapping if its not coherent. + auto latchedConsumer = make_smart_refctd_ptr( + IDeviceMemoryAllocation::MemoryRange(outputOffset, outputSize), + // Note the use of capture by-value [=] and not by-reference [&] because this lambda will be called asynchronously whenever the event signals + [=](const size_t dstOffset, const void* bufSrc, const size_t size)->void + { + // The unused variable is used for letting the consumer know the subsection of the output we've managed to download + // But here we're sure we can get the whole thing in one go because we allocated the whole range ourselves. + assert(dstOffset == 0 && size == outputSize); + + std::cout << "Begin array GPU\n"; + unsigned_scalar_t* const data = reinterpret_cast(const_cast(bufSrc)); + std::cout << std::bitset<32>(data[0]) << "\n"; + /* + for (auto i = 0u; i < bufferSize; i++) { + std::cout << std::bitset<32>(data[i]) << "\n"; + } + */ + std::cout << "\nEnd array GPU\n"; + }, + // Its also necessary to hold onto the commandbuffer, even though we take care to not reset the parent pool, because if it + // hits its destructor, our automated reference counting will drop all references to objects used in the recorded commands. + // It could also be latched in the upstreaming deallocate, because its the same fence. + std::move(cmdbuf), m_downStreamingBuffer + ); + // We put a function we want to execute + m_downStreamingBuffer->multi_deallocate(AllocationCount, &outputOffset, &outputSize, futureWait, &latchedConsumer.get()); + + // ------------------------------------------- CPP ------------------------------------------------------------------------------------------------------ const auto masksArray = hlsl::morton::impl::decode_masks_array::Masks; for (auto i = 0u; i < 3; i++) { std::cout << std::bitset<32>(masksArray[i]) << std::endl; } + const auto someCode = hlsl::morton::code::create(hlsl::vector(1, 1, 1, 1)); + return true; } @@ -61,8 +267,35 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // Whether to keep invoking the above. In this example because its headless GPU compute, we do all the work in the app initialization. bool keepRunning() override {return false;} + // Cleanup + bool onAppTerminated() override + { + // Need to make sure that there are no events outstanding if we want all lambdas to eventually execute before `onAppTerminated` + // (the destructors of the Command Pool Cache and Streaming buffers will still wait for all lambda events to drain) + while (m_downStreamingBuffer->cull_frees()) {} + return device_base_t::onAppTerminated(); + } + private: - smart_refctd_ptr m_api; + smart_refctd_ptr m_pipeline; + + smart_refctd_ptr m_utils; + + nbl::video::StreamingTransientDataBufferMT<>* m_upStreamingBuffer; + StreamingTransientDataBufferMT<>* m_downStreamingBuffer; + smart_refctd_ptr m_deviceLocalBuffer; + + // These are Buffer Device Addresses + uint64_t m_upStreamingBufferAddress; + uint64_t m_downStreamingBufferAddress; + uint64_t m_deviceLocalBufferAddress; + + // You can ask the `nbl::core::GeneralpurposeAddressAllocator` used internally by the Streaming Buffers give out offsets aligned to a certain multiple (not only Power of Two!) + uint32_t m_alignment; + + // This example really lets the advantages of a timeline semaphore shine through! + smart_refctd_ptr m_timeline; + uint64_t semaphorValue = 0; }; From 0aedfd929a505657ef761c84be15cfaf8d4ddb7b Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 28 Mar 2025 20:16:45 -0300 Subject: [PATCH 03/57] All tests passing, HLSL compiles fine! --- XX_Mortons/main.cpp | 235 +++++++++++++++++++++++++++++++++----------- 1 file changed, 177 insertions(+), 58 deletions(-) diff --git a/XX_Mortons/main.cpp b/XX_Mortons/main.cpp index 860b581d2..b20662904 100644 --- a/XX_Mortons/main.cpp +++ b/XX_Mortons/main.cpp @@ -10,6 +10,9 @@ #include "app_resources/common.hlsl" #include +// Right now the test only checks that HLSL compiles the file +constexpr bool TestHLSL = true; + using namespace nbl; using namespace core; using namespace system; @@ -22,6 +25,12 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, using device_base_t = application_templates::MonoDeviceApplication; using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using morton_t = nbl::hlsl::morton::code; + using vector_t = nbl::hlsl::vector; + using unsigned_morton_t = nbl::hlsl::morton::code; + using unsigned_vector_t = nbl::hlsl::vector; + using bool_vector_t = nbl::hlsl::vector; + inline core::smart_refctd_ptr createShader( const char* includeMainName) { @@ -43,18 +52,173 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, if (!asset_base_t::onAppInitialized(std::move(system))) return false; + // ----------------------------------------------- CPP TESTS ---------------------------------------------------------------------- + + // Coordinate extraction and whole vector decode tests + { + morton_t morton(vector_t(-1011, 765, 248)); + unsigned_morton_t unsignedMorton(unsigned_vector_t(154, 789, 1011)); + + assert(morton.getCoordinate(0) == -1011 && morton.getCoordinate(1) == 765 && morton.getCoordinate(2) == 248); + assert(unsignedMorton.getCoordinate(0) == 154u && unsignedMorton.getCoordinate(1) == 789u && unsignedMorton.getCoordinate(2) == 1011u); + + assert(static_cast(morton) == vector_t(-1011, 765, 248) && static_cast(unsignedMorton) == unsigned_vector_t(154, 789, 1011)); + } + + // *********************************************************************************************************************************** + // ************************************************* Arithmetic operator tests ******************************************************* + // *********************************************************************************************************************************** + + // ---------------------------------------------------------------------------------------------------- + // --------------------------------------- ADDITION --------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // ---------------------------------------- Signed ----------------------------------------------------- + + // No overflow + assert(static_cast(morton_t(vector_t(-1011, 765, 248)) + morton_t(vector_t(1000, -985, 200))) == vector_t(-11, -220, 448)); + + // Type 1 overflow: Addition of representable coordinates goes out of range + assert(static_cast(morton_t(vector_t(-900, 70, 500)) + morton_t(vector_t(-578, -50, 20))) == vector_t(570, 20, -504)); + + // Type 2 overflow: Addition of irrepresentable range gives correct result + assert(static_cast(morton_t(vector_t(54, 900, -475)) + morton_t(vector_t(46, -1437, 699))) == vector_t(100, -537, 224)); + + // ---------------------------------------- Unsigned ----------------------------------------------------- + + // No overflow + assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(1563, 754, 220))) == unsigned_vector_t(1945, 1664, 763)); + + // Type 1 overflow: Addition of representable coordinates goes out of range + assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(2000, 2000, 1000))) == unsigned_vector_t(334, 862, 519)); + + // Type 2 overflow: Addition of irrepresentable range gives correct result + assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(-143, -345, -233))) == unsigned_vector_t(239, 565, 310)); + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- SUBTRACTION ------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // ---------------------------------------- Signed ----------------------------------------------------- + + // No overflow + assert(static_cast(morton_t(vector_t(1000, 764, -365)) - morton_t(vector_t(834, -243, 100))) == vector_t(166, 1007, -465)); + + // Type 1 overflow: Subtraction of representable coordinates goes out of range + assert(static_cast(morton_t(vector_t(-900, 70, 500)) - morton_t(vector_t(578, -50, -20))) == vector_t(570, 120, -504)); + + // Type 2 overflow: Subtraction of irrepresentable range gives correct result + assert(static_cast(morton_t(vector_t(54, 900, -475)) - morton_t(vector_t(-46, 1437, -699))) == vector_t(100, -537, 224)); + + // ---------------------------------------- Unsigned ----------------------------------------------------- + + // No overflow + assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) - unsigned_morton_t(unsigned_vector_t(322, 564, 299))) == unsigned_vector_t(60, 346, 244)); + + // Type 1 overflow: Subtraction of representable coordinates goes out of range + assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) - unsigned_morton_t(unsigned_vector_t(2000, 2000, 1000))) == unsigned_vector_t(430, 958, 567)); + + // Type 2 overflow: Subtraction of irrepresentable range gives correct result + assert(static_cast(unsigned_morton_t(unsigned_vector_t(54, 900, 475)) - unsigned_morton_t(unsigned_vector_t(-865, -100, -10))) == unsigned_vector_t(919, 1000, 485)); + + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- UNARY NEGATION ---------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // Only makes sense for signed + assert(static_cast(- morton_t(vector_t(-1024, 543, -475))) == vector_t(-1024, -543, 475)); + + // *********************************************************************************************************************************** + // ************************************************* Comparison operator tests ******************************************************* + // *********************************************************************************************************************************** + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- OPERATOR< --------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // Signed + + // Same sign, negative + assert(morton_t(vector_t(-954, -455, -333)) < morton_t(vector_t(-433, -455, -433)) == bool_vector_t(true, false, false)); + // Same sign, positive + assert(morton_t(vector_t(954, 455, 333)) < morton_t(vector_t(433, 455, 433)) == bool_vector_t(false, false, true)); + // Differing signs + assert(morton_t(vector_t(954, -32, 0)) < morton_t(vector_t(-44, 0, -1)) == bool_vector_t(false, true, false)); + + // Unsigned + assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) < unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(true, false, false)); + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- OPERATOR<= -------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // Signed + + // Same sign, negative + assert(morton_t(vector_t(-954, -455, -333)) <= morton_t(vector_t(-433, -455, -433)) == bool_vector_t(true, true, false)); + // Same sign, positive + assert(morton_t(vector_t(954, 455, 333)) <= morton_t(vector_t(433, 455, 433)) == bool_vector_t(false, true, true)); + // Differing signs + assert(morton_t(vector_t(954, -32, 0)) <= morton_t(vector_t(-44, 0, -1)) == bool_vector_t(false, true, false)); + + // Unsigned + assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) <= unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(true, true, false)); + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- OPERATOR> --------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // Signed + + // Same sign, negative + assert(morton_t(vector_t(-954, -455, -333)) > morton_t(vector_t(-433, -455, -433)) == bool_vector_t(false, false, true)); + // Same sign, positive + assert(morton_t(vector_t(954, 455, 333)) > morton_t(vector_t(433, 455, 433)) == bool_vector_t(true, false, false)); + // Differing signs + assert(morton_t(vector_t(954, -32, 0)) > morton_t(vector_t(-44, 0, -1)) == bool_vector_t(true, false, true)); + + // Unsigned + assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) > unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(false, false, true)); + + // ---------------------------------------------------------------------------------------------------- + // -------------------------------------- OPERATOR>= -------------------------------------------------- + // ---------------------------------------------------------------------------------------------------- + + // Signed + + // Same sign, negative + assert(morton_t(vector_t(-954, -455, -333)) >= morton_t(vector_t(-433, -455, -433)) == bool_vector_t(false, true, true)); + // Same sign, positive + assert(morton_t(vector_t(954, 455, 333)) >= morton_t(vector_t(433, 455, 433)) == bool_vector_t(true, true, false)); + // Differing signs + assert(morton_t(vector_t(954, -32, 0)) >= morton_t(vector_t(-44, 0, -1)) == bool_vector_t(true, false, true)); + + // Unsigned + assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) >= unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(false, true, true)); + + + if(!TestHLSL) + return true; + + + + + + + + + + // ----------------------------------------------- HLSL COMPILATION + OPTIONAL TESTS ---------------------------------------------- auto shader = createShader("app_resources/shader.hlsl"); // Create massive upload/download buffers constexpr uint32_t DownstreamBufferSize = sizeof(unsigned_scalar_t) << 23; - constexpr uint32_t UpstreamBufferSize = sizeof(unsigned_scalar_t) << 23; - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); - m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); m_downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); - m_upStreamingBufferAddress = m_upStreamingBuffer->getBuffer()->getDeviceAddress(); m_downStreamingBufferAddress = m_downStreamingBuffer->getBuffer()->getDeviceAddress(); // Create device-local buffer @@ -109,40 +273,9 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // Just need a single suballocation in this example const uint32_t AllocationCount = 1; - // It comes with a certain drawback that you need to remember to initialize your "yet unallocated" offsets to the Invalid value - // this is to allow a set of allocations to fail, and you to re-try after doing something to free up space without repacking args. - auto inputOffset = m_upStreamingBuffer->invalid_value; - // We always just wait till an allocation becomes possible (during allocation previous "latched" frees get their latch conditions polled) // Freeing of Streaming Buffer Allocations can and should be deferred until an associated polled event signals done (more on that later). std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); - // note that the API takes a time-point not a duration, because there are multiple waits and preemptions possible, so the durations wouldn't add up properly - m_upStreamingBuffer->multi_allocate(waitTill, AllocationCount, &inputOffset, &inputSize, &m_alignment); - - // Generate our data in-place on the allocated staging buffer. Packing is interleaved in this example! - { - auto* const inputPtr = reinterpret_cast(reinterpret_cast(m_upStreamingBuffer->getBufferPointer()) + inputOffset); - for (auto j = 0; j < bufferSize; j++) - { - unsigned_scalar_t x = j > 0 ? 0.f : 2.f; - unsigned_scalar_t y = 0; - - /* - unsigned_scalar_t x = 1.f; - unsigned_scalar_t y = 0.f; - */ - - inputPtr[2 * j] = x; - inputPtr[2 * j + 1] = y; - } - // Always remember to flush! - if (m_upStreamingBuffer->needsManualFlushOrInvalidate()) - { - const auto bound = m_upStreamingBuffer->getBuffer()->getBoundMemory(); - const ILogicalDevice::MappedMemoryRange range(bound.memory, bound.offset + inputOffset, inputSize); - m_device->flushMappedMemoryRanges(1, &range); - } - } // finally allocate our output range const uint32_t outputSize = inputSize; @@ -161,11 +294,6 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, cmdbuf->bindComputePipeline(m_pipeline.get()); // This is the new fun part, pushing constants const PushConstantData pc = { .deviceBufferAddress = m_deviceLocalBufferAddress }; - IGPUCommandBuffer::SBufferCopy copyInfo = {}; - copyInfo.srcOffset = 0; - copyInfo.dstOffset = 0; - copyInfo.size = m_deviceLocalBuffer->getSize(); - cmdbuf->copyBuffer(m_upStreamingBuffer->getBuffer(), m_deviceLocalBuffer.get(), 1, ©Info); cmdbuf->pushConstants(m_pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(pc), &pc); // Remember we do a single workgroup per 1D array in these parts cmdbuf->dispatch(1, 1, 1); @@ -184,6 +312,11 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS; cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS(0), pipelineBarrierInfo); + + IGPUCommandBuffer::SBufferCopy copyInfo = {}; + copyInfo.srcOffset = 0; + copyInfo.dstOffset = 0; + copyInfo.size = m_deviceLocalBuffer->getSize(); cmdbuf->copyBuffer(m_deviceLocalBuffer.get(), m_downStreamingBuffer->getBuffer(), 1, ©Info); cmdbuf->end(); } @@ -215,10 +348,6 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // We let all latches know what semaphore and counter value has to be passed for the functors to execute const ISemaphore::SWaitInfo futureWait = { m_timeline.get(),semaphorValue }; - // As promised, we can defer an upstreaming buffer deallocation until a fence is signalled - // You can also attach an additional optional IReferenceCounted derived object to hold onto until deallocation. - m_upStreamingBuffer->multi_deallocate(AllocationCount, &inputOffset, &inputSize, futureWait); - // Now a new and even more advanced usage of the latched events, we make our own refcounted object with a custom destructor and latch that like we did the commandbuffer. // Instead of making our own and duplicating logic, we'll use one from IUtilities meant for down-staging memory. // Its nice because it will also remember to invalidate our memory mapping if its not coherent. @@ -249,15 +378,6 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // We put a function we want to execute m_downStreamingBuffer->multi_deallocate(AllocationCount, &outputOffset, &outputSize, futureWait, &latchedConsumer.get()); - // ------------------------------------------- CPP ------------------------------------------------------------------------------------------------------ - const auto masksArray = hlsl::morton::impl::decode_masks_array::Masks; - for (auto i = 0u; i < 3; i++) - { - std::cout << std::bitset<32>(masksArray[i]) << std::endl; - } - - const auto someCode = hlsl::morton::code::create(hlsl::vector(1, 1, 1, 1)); - return true; } @@ -272,7 +392,10 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, { // Need to make sure that there are no events outstanding if we want all lambdas to eventually execute before `onAppTerminated` // (the destructors of the Command Pool Cache and Streaming buffers will still wait for all lambda events to drain) - while (m_downStreamingBuffer->cull_frees()) {} + if (TestHLSL) + { + while (m_downStreamingBuffer->cull_frees()) {} + } return device_base_t::onAppTerminated(); } @@ -281,19 +404,15 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, smart_refctd_ptr m_utils; - nbl::video::StreamingTransientDataBufferMT<>* m_upStreamingBuffer; StreamingTransientDataBufferMT<>* m_downStreamingBuffer; smart_refctd_ptr m_deviceLocalBuffer; // These are Buffer Device Addresses - uint64_t m_upStreamingBufferAddress; uint64_t m_downStreamingBufferAddress; uint64_t m_deviceLocalBufferAddress; - // You can ask the `nbl::core::GeneralpurposeAddressAllocator` used internally by the Streaming Buffers give out offsets aligned to a certain multiple (not only Power of Two!) uint32_t m_alignment; - // This example really lets the advantages of a timeline semaphore shine through! smart_refctd_ptr m_timeline; uint64_t semaphorValue = 0; }; From ea42d5bf287cbff376809be65f64c71567e0134f Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 15:44:55 -0300 Subject: [PATCH 04/57] Rename example --- {XX_Mortons => 12_Mortons}/CMakeLists.txt | 0 12_Mortons/app_resources/common.hlsl | 13 ++++++++++++ .../app_resources/shader.hlsl | 8 ++++--- .../config.json.template | 0 {XX_Mortons => 12_Mortons}/main.cpp | 21 ++++++++----------- {XX_Mortons => 12_Mortons}/pipeline.groovy | 0 CMakeLists.txt | 2 +- XX_Mortons/app_resources/common.hlsl | 10 --------- 8 files changed, 28 insertions(+), 26 deletions(-) rename {XX_Mortons => 12_Mortons}/CMakeLists.txt (100%) create mode 100644 12_Mortons/app_resources/common.hlsl rename {XX_Mortons => 12_Mortons}/app_resources/shader.hlsl (79%) rename {XX_Mortons => 12_Mortons}/config.json.template (100%) rename {XX_Mortons => 12_Mortons}/main.cpp (97%) rename {XX_Mortons => 12_Mortons}/pipeline.groovy (100%) delete mode 100644 XX_Mortons/app_resources/common.hlsl diff --git a/XX_Mortons/CMakeLists.txt b/12_Mortons/CMakeLists.txt similarity index 100% rename from XX_Mortons/CMakeLists.txt rename to 12_Mortons/CMakeLists.txt diff --git a/12_Mortons/app_resources/common.hlsl b/12_Mortons/app_resources/common.hlsl new file mode 100644 index 000000000..bd5184f80 --- /dev/null +++ b/12_Mortons/app_resources/common.hlsl @@ -0,0 +1,13 @@ +//#include "nbl/builtin/hlsl/morton.hlsl" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t bufferSize = 256; + +// Proper coverage would require writing tests for ALL possible sign, dimensions and width configurations +//using morton_t2 = nbl::hlsl::morton::code; // Fits in an int16_t +using vector_t2 = nbl::hlsl::vector; + +struct PushConstantData +{ + uint64_t deviceBufferAddress; +}; \ No newline at end of file diff --git a/XX_Mortons/app_resources/shader.hlsl b/12_Mortons/app_resources/shader.hlsl similarity index 79% rename from XX_Mortons/app_resources/shader.hlsl rename to 12_Mortons/app_resources/shader.hlsl index d1f7c967e..e7f570eee 100644 --- a/XX_Mortons/app_resources/shader.hlsl +++ b/12_Mortons/app_resources/shader.hlsl @@ -3,14 +3,16 @@ [[vk::push_constant]] PushConstantData pushConstants; -using namespace nbl::hlsl; - [numthreads(bufferSize, 1, 1)] void main(uint32_t3 ID : SV_DispatchThreadID) { + /* LegacyBdaAccessor accessor = LegacyBdaAccessor::create(pushConstants.deviceBufferAddress); morton::code foo = morton::code::create(vector(-32768, -1)); - accessor.set(0, foo.value); + //accessor.set(0, foo.value); + */ + uint32_t bar = _static_cast(0xCAFEDEADDEADBEEF); + accessor.set(0, bar); } \ No newline at end of file diff --git a/XX_Mortons/config.json.template b/12_Mortons/config.json.template similarity index 100% rename from XX_Mortons/config.json.template rename to 12_Mortons/config.json.template diff --git a/XX_Mortons/main.cpp b/12_Mortons/main.cpp similarity index 97% rename from XX_Mortons/main.cpp rename to 12_Mortons/main.cpp index b20662904..d1fddba7a 100644 --- a/XX_Mortons/main.cpp +++ b/12_Mortons/main.cpp @@ -25,12 +25,6 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, using device_base_t = application_templates::MonoDeviceApplication; using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; - using morton_t = nbl::hlsl::morton::code; - using vector_t = nbl::hlsl::vector; - using unsigned_morton_t = nbl::hlsl::morton::code; - using unsigned_vector_t = nbl::hlsl::vector; - using bool_vector_t = nbl::hlsl::vector; - inline core::smart_refctd_ptr createShader( const char* includeMainName) { @@ -52,6 +46,8 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, if (!asset_base_t::onAppInitialized(std::move(system))) return false; + /* + // ----------------------------------------------- CPP TESTS ---------------------------------------------------------------------- // Coordinate extraction and whole vector decode tests @@ -201,7 +197,7 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, if(!TestHLSL) return true; - + */ @@ -213,7 +209,7 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, auto shader = createShader("app_resources/shader.hlsl"); // Create massive upload/download buffers - constexpr uint32_t DownstreamBufferSize = sizeof(unsigned_scalar_t) << 23; + constexpr uint32_t DownstreamBufferSize = sizeof(uint32_t) << 23; m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize); if (!m_utils) @@ -230,7 +226,7 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, deviceLocalBufferParams.queueFamilyIndexCount = 1; deviceLocalBufferParams.queueFamilyIndices = &queueFamilyIndex; - deviceLocalBufferParams.size = sizeof(unsigned_scalar_t) * bufferSize; + deviceLocalBufferParams.size = sizeof(uint32_t) * bufferSize; deviceLocalBufferParams.usage = nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; m_deviceLocalBuffer = m_device->createBuffer(std::move(deviceLocalBufferParams)); @@ -268,7 +264,7 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, IQueue* const queue = getComputeQueue(); - const uint32_t inputSize = sizeof(unsigned_scalar_t) * bufferSize; + const uint32_t inputSize = sizeof(uint32_t) * bufferSize; // Just need a single suballocation in this example const uint32_t AllocationCount = 1; @@ -361,8 +357,9 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, assert(dstOffset == 0 && size == outputSize); std::cout << "Begin array GPU\n"; - unsigned_scalar_t* const data = reinterpret_cast(const_cast(bufSrc)); - std::cout << std::bitset<32>(data[0]) << "\n"; + uint32_t* const data = reinterpret_cast(const_cast(bufSrc)); + //std::cout << std::bitset<32>(data[0]) << "\n"; + std::cout << data[0] << "\n"; /* for (auto i = 0u; i < bufferSize; i++) { std::cout << std::bitset<32>(data[i]) << "\n"; diff --git a/XX_Mortons/pipeline.groovy b/12_Mortons/pipeline.groovy similarity index 100% rename from XX_Mortons/pipeline.groovy rename to 12_Mortons/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fcddfc18..5d0c148cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(10_CountingSort EXCLUDE_FROM_ALL) # showcase use of FFT for post-FX Bloom effect add_subdirectory(11_FFT EXCLUDE_FROM_ALL) + add_subdirectory(12_Mortons EXCLUDE_FROM_ALL) # Waiting for a refactor @@ -96,7 +97,6 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(68_JpegLoading EXCLUDE_FROM_ALL) add_subdirectory(70_FLIPFluids EXCLUDE_FROM_ALL) - add_subdirectory(XX_Mortons EXCLUDE_FROM_ALL) NBL_HOOK_COMMON_API("${NBL_COMMON_API_TARGETS}") endif() diff --git a/XX_Mortons/app_resources/common.hlsl b/XX_Mortons/app_resources/common.hlsl deleted file mode 100644 index 3a9fca3fa..000000000 --- a/XX_Mortons/app_resources/common.hlsl +++ /dev/null @@ -1,10 +0,0 @@ -#include "nbl/builtin/hlsl/math/morton.hlsl" - -NBL_CONSTEXPR uint32_t bufferSize = 256; -using scalar_t = int32_t; -using unsigned_scalar_t = nbl::hlsl::make_unsigned_t; - -struct PushConstantData -{ - uint64_t deviceBufferAddress; -}; \ No newline at end of file From 2ba08a4a39bf15b3c689666012b263794b8371f2 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 1 Apr 2025 17:43:20 -0300 Subject: [PATCH 05/57] Add tests for AddCarry and SUbBorrow intrinsics --- 22_CppCompat/CIntrinsicsTester.h | 13 + 22_CppCompat/app_resources/common.hlsl | 859 +++++++++++++------------ 2 files changed, 451 insertions(+), 421 deletions(-) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index 77aa2c1ca..5fe7bc08e 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -85,6 +85,10 @@ class CIntrinsicsTester final : public ITester testInput.smoothStepEdge0 = realDistributionNeg(mt); testInput.smoothStepEdge1 = realDistributionPos(mt); testInput.smoothStepX = realDistribution(mt); + testInput.addCarryA = std::numeric_limits::max() - uintDistribution(mt); + testInput.addCarryB = uintDistribution(mt); + testInput.subBorrowA = uintDistribution(mt); + testInput.subBorrowB = uintDistribution(mt); testInput.bitCountVec = int32_t3(intDistribution(mt), intDistribution(mt), intDistribution(mt)); testInput.clampValVec = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); @@ -119,6 +123,10 @@ class CIntrinsicsTester final : public ITester testInput.refractI = float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt)); testInput.refractN = glm::normalize(float32_t3(realDistribution(mt), realDistribution(mt), realDistribution(mt))); testInput.refractEta = realDistribution(mt); + testInput.addCarryAVec = uint32_t3(std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt), std::numeric_limits::max() - uintDistribution(mt)); + testInput.addCarryBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); + testInput.subBorrowAVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); + testInput.subBorrowBVec = uint32_t3(uintDistribution(mt), uintDistribution(mt), uintDistribution(mt)); // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values IntrinsicsTestValues expected; @@ -188,6 +196,11 @@ class CIntrinsicsTester final : public ITester auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); expected.inverse = reinterpret_cast(inverseGlm); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + performCpuTests(testInput, expected); performGpuTests(testInput, expected); } diff --git a/22_CppCompat/app_resources/common.hlsl b/22_CppCompat/app_resources/common.hlsl index e2303a2fc..dc3ff5fcd 100644 --- a/22_CppCompat/app_resources/common.hlsl +++ b/22_CppCompat/app_resources/common.hlsl @@ -1,74 +1,74 @@ -//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. -//// This file is part of the "Nabla Engine". -//// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ - -// because DXC doesn't properly support `_Static_assert` -// TODO: add a message, and move to macros.h or cpp_compat -#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } - -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include - -#include - - -#include -#include -#include - -#include -#include - -// tgmath.hlsl and intrinsics.hlsl tests - -using namespace nbl::hlsl; -struct TgmathIntputTestValues -{ - float floor; - float isnan; - float isinf; - float powX; - float powY; - float exp; - float exp2; - float log; - float log2; - float absF; - int absI; - float sqrt; - float sin; - float cos; - float acos; - float modf; - float round; - float roundEven; - float trunc; - float ceil; - float fmaX; - float fmaY; - float fmaZ; - float ldexpArg; - int ldexpExp; - float modfStruct; - float frexpStruct; +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_COMMON_INCLUDED_ + +// because DXC doesn't properly support `_Static_assert` +// TODO: add a message, and move to macros.h or cpp_compat +#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } + +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include + + +#include +#include +#include + +#include +#include + +// tgmath.hlsl and intrinsics.hlsl tests + +using namespace nbl::hlsl; +struct TgmathIntputTestValues +{ + float floor; + float isnan; + float isinf; + float powX; + float powY; + float exp; + float exp2; + float log; + float log2; + float absF; + int absI; + float sqrt; + float sin; + float cos; + float acos; + float modf; + float round; + float roundEven; + float trunc; + float ceil; + float fmaX; + float fmaY; + float fmaZ; + float ldexpArg; + int ldexpExp; + float modfStruct; + float frexpStruct; float tan; float asin; float atan; @@ -78,38 +78,38 @@ struct TgmathIntputTestValues float asinh; float acosh; float atanh; - float atan2X; - float atan2Y; - float erf; - float erfInv; - - float32_t3 floorVec; - float32_t3 isnanVec; - float32_t3 isinfVec; - float32_t3 powXVec; - float32_t3 powYVec; - float32_t3 expVec; - float32_t3 exp2Vec; - float32_t3 logVec; - float32_t3 log2Vec; - float32_t3 absFVec; - int32_t3 absIVec; - float32_t3 sqrtVec; - float32_t3 sinVec; - float32_t3 cosVec; - float32_t3 acosVec; - float32_t3 modfVec; - float32_t3 roundVec; - float32_t3 roundEvenVec; - float32_t3 truncVec; - float32_t3 ceilVec; - float32_t3 fmaXVec; - float32_t3 fmaYVec; - float32_t3 fmaZVec; - float32_t3 ldexpArgVec; - int32_t3 ldexpExpVec; - float32_t3 modfStructVec; - float32_t3 frexpStructVec; + float atan2X; + float atan2Y; + float erf; + float erfInv; + + float32_t3 floorVec; + float32_t3 isnanVec; + float32_t3 isinfVec; + float32_t3 powXVec; + float32_t3 powYVec; + float32_t3 expVec; + float32_t3 exp2Vec; + float32_t3 logVec; + float32_t3 log2Vec; + float32_t3 absFVec; + int32_t3 absIVec; + float32_t3 sqrtVec; + float32_t3 sinVec; + float32_t3 cosVec; + float32_t3 acosVec; + float32_t3 modfVec; + float32_t3 roundVec; + float32_t3 roundEvenVec; + float32_t3 truncVec; + float32_t3 ceilVec; + float32_t3 fmaXVec; + float32_t3 fmaYVec; + float32_t3 fmaZVec; + float32_t3 ldexpArgVec; + int32_t3 ldexpExpVec; + float32_t3 modfStructVec; + float32_t3 frexpStructVec; float32_t3 tanVec; float32_t3 asinVec; float32_t3 atanVec; @@ -119,35 +119,35 @@ struct TgmathIntputTestValues float32_t3 asinhVec; float32_t3 acoshVec; float32_t3 atanhVec; - float32_t3 atan2XVec; - float32_t3 atan2YVec; - float32_t3 erfVec; - float32_t3 erfInvVec; -}; - -struct TgmathTestValues -{ - float floor; - int isnan; - int isinf; - float pow; - float exp; - float exp2; - float log; - float log2; - float absF; - int absI; - float sqrt; - float sin; - float cos; - float acos; - float modf; - float round; - float roundEven; - float trunc; - float ceil; - float fma; - float ldexp; + float32_t3 atan2XVec; + float32_t3 atan2YVec; + float32_t3 erfVec; + float32_t3 erfInvVec; +}; + +struct TgmathTestValues +{ + float floor; + int isnan; + int isinf; + float pow; + float exp; + float exp2; + float log; + float log2; + float absF; + int absI; + float sqrt; + float sin; + float cos; + float acos; + float modf; + float round; + float roundEven; + float trunc; + float ceil; + float fma; + float ldexp; float tan; float asin; float atan; @@ -157,40 +157,40 @@ struct TgmathTestValues float asinh; float acosh; float atanh; - float atan2; - float erf; - float erfInv; - - float32_t3 floorVec; - - // we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below - // and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035 -#ifndef __HLSL_VERSION - nbl::hlsl::vector isnanVec; - nbl::hlsl::vector isinfVec; -#else - vector isnanVec; - vector isinfVec; -#endif - - float32_t3 powVec; - float32_t3 expVec; - float32_t3 exp2Vec; - float32_t3 logVec; - float32_t3 log2Vec; - float32_t3 absFVec; - int32_t3 absIVec; - float32_t3 sqrtVec; - float32_t3 cosVec; - float32_t3 sinVec; - float32_t3 acosVec; - float32_t3 modfVec; - float32_t3 roundVec; - float32_t3 roundEvenVec; - float32_t3 truncVec; - float32_t3 ceilVec; - float32_t3 fmaVec; - float32_t3 ldexpVec; + float atan2; + float erf; + float erfInv; + + float32_t3 floorVec; + + // we can't fix this because using namespace nbl::hlsl would cause ambiguous math functions below + // and we can't add a nbl::hlsl alias for the builtin hLSL vector type because of https://github.com/microsoft/DirectXShaderCompiler/issues/7035 +#ifndef __HLSL_VERSION + nbl::hlsl::vector isnanVec; + nbl::hlsl::vector isinfVec; +#else + vector isnanVec; + vector isinfVec; +#endif + + float32_t3 powVec; + float32_t3 expVec; + float32_t3 exp2Vec; + float32_t3 logVec; + float32_t3 log2Vec; + float32_t3 absFVec; + int32_t3 absIVec; + float32_t3 sqrtVec; + float32_t3 cosVec; + float32_t3 sinVec; + float32_t3 acosVec; + float32_t3 modfVec; + float32_t3 roundVec; + float32_t3 roundEvenVec; + float32_t3 truncVec; + float32_t3 ceilVec; + float32_t3 fmaVec; + float32_t3 ldexpVec; float32_t3 tanVec; float32_t3 asinVec; float32_t3 atanVec; @@ -200,258 +200,275 @@ struct TgmathTestValues float32_t3 asinhVec; float32_t3 acoshVec; float32_t3 atanhVec; - float32_t3 atan2Vec; - float32_t3 erfVec; - float32_t3 erfInvVec; - - ModfOutput modfStruct; - ModfOutput modfStructVec; - FrexpOutput frexpStruct; - FrexpOutput frexpStructVec; - - void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) - { - floor = nbl::hlsl::floor(input.floor); - isnan = nbl::hlsl::isnan(input.isnan); - isinf = nbl::hlsl::isinf(input.isinf); - pow = nbl::hlsl::pow(input.powX, input.powY); - exp = nbl::hlsl::exp(input.exp); - exp2 = nbl::hlsl::exp2(input.exp2); - log = nbl::hlsl::log(input.log); - log2 = nbl::hlsl::log2(input.log2); - absF = nbl::hlsl::abs(input.absF); - absI = nbl::hlsl::abs(input.absI); - sqrt = nbl::hlsl::sqrt(input.sqrt); - sin = nbl::hlsl::sin(input.sin); - cos = nbl::hlsl::cos(input.cos); - tan = nbl::hlsl::tan(input.tan); - asin = nbl::hlsl::asin(input.asin); - atan = nbl::hlsl::atan(input.atan); - sinh = nbl::hlsl::sinh(input.sinh); - cosh = nbl::hlsl::cosh(input.cosh); - tanh = nbl::hlsl::tanh(input.tanh); - asinh = nbl::hlsl::asinh(input.asinh); - acosh = nbl::hlsl::acosh(input.acosh); - atanh = nbl::hlsl::atanh(input.atanh); - atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); - erf = nbl::hlsl::erf(input.erf); - erfInv = nbl::hlsl::erfInv(input.erfInv); - acos = nbl::hlsl::acos(input.acos); - modf = nbl::hlsl::modf(input.modf); - round = nbl::hlsl::round(input.round); - roundEven = nbl::hlsl::roundEven(input.roundEven); - trunc = nbl::hlsl::trunc(input.trunc); - ceil = nbl::hlsl::ceil(input.ceil); - fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); - ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); - - floorVec = nbl::hlsl::floor(input.floorVec); - isnanVec = nbl::hlsl::isnan(input.isnanVec); - isinfVec = nbl::hlsl::isinf(input.isinfVec); - powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); - expVec = nbl::hlsl::exp(input.expVec); - exp2Vec = nbl::hlsl::exp2(input.exp2Vec); - logVec = nbl::hlsl::log(input.logVec); - log2Vec = nbl::hlsl::log2(input.log2Vec); - absFVec = nbl::hlsl::abs(input.absFVec); - absIVec = nbl::hlsl::abs(input.absIVec); - sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); - sinVec = nbl::hlsl::sin(input.sinVec); - cosVec = nbl::hlsl::cos(input.cosVec); - tanVec = nbl::hlsl::tan(input.tanVec); - asinVec = nbl::hlsl::asin(input.asinVec); - atanVec = nbl::hlsl::atan(input.atanVec); - sinhVec = nbl::hlsl::sinh(input.sinhVec); - coshVec = nbl::hlsl::cosh(input.coshVec); - tanhVec = nbl::hlsl::tanh(input.tanhVec); - asinhVec = nbl::hlsl::asinh(input.asinhVec); - acoshVec = nbl::hlsl::acosh(input.acoshVec); - atanhVec = nbl::hlsl::atanh(input.atanhVec); - atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); - acosVec = nbl::hlsl::acos(input.acosVec); - modfVec = nbl::hlsl::modf(input.modfVec); - roundVec = nbl::hlsl::round(input.roundVec); - roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); - truncVec = nbl::hlsl::trunc(input.truncVec); - ceilVec = nbl::hlsl::ceil(input.ceilVec); - fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); - ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); - erfVec = nbl::hlsl::erf(input.erfVec); - erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); - - modfStruct = nbl::hlsl::modfStruct(input.modfStruct); - modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); - frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); - frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); - } -}; - -struct IntrinsicsIntputTestValues -{ - int bitCount; - float32_t3 crossLhs; - float32_t3 crossRhs; - float clampVal; - float clampMin; - float clampMax; - float32_t3 length; - float32_t3 normalize; - float32_t3 dotLhs; - float32_t3 dotRhs; - float32_t3x3 determinant; - uint32_t findMSB; - uint32_t findLSB; - float32_t3x3 inverse; - float32_t3x3 transpose; - float32_t3x3 mulLhs; - float32_t3x3 mulRhs; - float minA; - float minB; - float maxA; - float maxB; - float rsqrt; - uint32_t bitReverse; - float frac; - float mixX; - float mixY; - float mixA; - float sign; - float radians; - float degrees; - float stepEdge; - float stepX; - float smoothStepEdge0; - float smoothStepEdge1; - float smoothStepX; - - int32_t3 bitCountVec; - float32_t3 clampValVec; - float32_t3 clampMinVec; - float32_t3 clampMaxVec; - uint32_t3 findMSBVec; - uint32_t3 findLSBVec; - float32_t3 minAVec; - float32_t3 minBVec; - float32_t3 maxAVec; - float32_t3 maxBVec; - float32_t3 rsqrtVec; - uint32_t3 bitReverseVec; - float32_t3 fracVec; - float32_t3 mixXVec; - float32_t3 mixYVec; - float32_t3 mixAVec; - float32_t3 signVec; - float32_t3 radiansVec; - float32_t3 degreesVec; - float32_t3 stepEdgeVec; - float32_t3 stepXVec; - float32_t3 smoothStepEdge0Vec; - float32_t3 smoothStepEdge1Vec; - float32_t3 smoothStepXVec; - float32_t3 faceForwardN; - float32_t3 faceForwardI; - float32_t3 faceForwardNref; - float32_t3 reflectI; - float32_t3 reflectN; - float32_t3 refractI; - float32_t3 refractN; - float refractEta; -}; - -struct IntrinsicsTestValues -{ - int bitCount; - float clamp; - float length; - float dot; - float determinant; - int findMSB; - int findLSB; - float min; - float max; - float rsqrt; - float frac; - uint32_t bitReverse; - float mix; - float sign; - float radians; - float degrees; - float step; - float smoothStep; - - float32_t3 normalize; - float32_t3 cross; - int32_t3 bitCountVec; - float32_t3 clampVec; - uint32_t3 findMSBVec; - uint32_t3 findLSBVec; - float32_t3 minVec; - float32_t3 maxVec; - float32_t3 rsqrtVec; - uint32_t3 bitReverseVec; - float32_t3 fracVec; - float32_t3 mixVec; - float32_t3 signVec; - float32_t3 radiansVec; - float32_t3 degreesVec; - float32_t3 stepVec; - float32_t3 smoothStepVec; - float32_t3 faceForward; - float32_t3 reflect; - float32_t3 refract; - - float32_t3x3 mul; - float32_t3x3 transpose; - float32_t3x3 inverse; - - void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) - { - bitCount = nbl::hlsl::bitCount(input.bitCount); - cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); - clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); - length = nbl::hlsl::length(input.length); - normalize = nbl::hlsl::normalize(input.normalize); - dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); - determinant = nbl::hlsl::determinant(input.determinant); - findMSB = nbl::hlsl::findMSB(input.findMSB); - findLSB = nbl::hlsl::findLSB(input.findLSB); - inverse = nbl::hlsl::inverse(input.inverse); - transpose = nbl::hlsl::transpose(input.transpose); - mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); - // TODO: fix min and max - min = nbl::hlsl::min(input.minA, input.minB); - max = nbl::hlsl::max(input.maxA, input.maxB); - rsqrt = nbl::hlsl::rsqrt(input.rsqrt); - bitReverse = nbl::hlsl::bitReverse(input.bitReverse); - frac = nbl::hlsl::fract(input.frac); - mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); - sign = nbl::hlsl::sign(input.sign); - radians = nbl::hlsl::radians(input.radians); - degrees = nbl::hlsl::degrees(input.degrees); - step = nbl::hlsl::step(input.stepEdge, input.stepX); - smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); - - bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); - clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); - findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); - findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); - // TODO: fix min and max - minVec = nbl::hlsl::min(input.minAVec, input.minBVec); - maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); - rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); - bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); - fracVec = nbl::hlsl::fract(input.fracVec); - mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); - - signVec = nbl::hlsl::sign(input.signVec); - radiansVec = nbl::hlsl::radians(input.radiansVec); - degreesVec = nbl::hlsl::degrees(input.degreesVec); - stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); - smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); - faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); - reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); - refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); - } -}; - -#endif + float32_t3 atan2Vec; + float32_t3 erfVec; + float32_t3 erfInvVec; + + ModfOutput modfStruct; + ModfOutput modfStructVec; + FrexpOutput frexpStruct; + FrexpOutput frexpStructVec; + + void fillTestValues(NBL_CONST_REF_ARG(TgmathIntputTestValues) input) + { + floor = nbl::hlsl::floor(input.floor); + isnan = nbl::hlsl::isnan(input.isnan); + isinf = nbl::hlsl::isinf(input.isinf); + pow = nbl::hlsl::pow(input.powX, input.powY); + exp = nbl::hlsl::exp(input.exp); + exp2 = nbl::hlsl::exp2(input.exp2); + log = nbl::hlsl::log(input.log); + log2 = nbl::hlsl::log2(input.log2); + absF = nbl::hlsl::abs(input.absF); + absI = nbl::hlsl::abs(input.absI); + sqrt = nbl::hlsl::sqrt(input.sqrt); + sin = nbl::hlsl::sin(input.sin); + cos = nbl::hlsl::cos(input.cos); + tan = nbl::hlsl::tan(input.tan); + asin = nbl::hlsl::asin(input.asin); + atan = nbl::hlsl::atan(input.atan); + sinh = nbl::hlsl::sinh(input.sinh); + cosh = nbl::hlsl::cosh(input.cosh); + tanh = nbl::hlsl::tanh(input.tanh); + asinh = nbl::hlsl::asinh(input.asinh); + acosh = nbl::hlsl::acosh(input.acosh); + atanh = nbl::hlsl::atanh(input.atanh); + atan2 = nbl::hlsl::atan2(input.atan2Y, input.atan2X); + erf = nbl::hlsl::erf(input.erf); + erfInv = nbl::hlsl::erfInv(input.erfInv); + acos = nbl::hlsl::acos(input.acos); + modf = nbl::hlsl::modf(input.modf); + round = nbl::hlsl::round(input.round); + roundEven = nbl::hlsl::roundEven(input.roundEven); + trunc = nbl::hlsl::trunc(input.trunc); + ceil = nbl::hlsl::ceil(input.ceil); + fma = nbl::hlsl::fma(input.fmaX, input.fmaY, input.fmaZ); + ldexp = nbl::hlsl::ldexp(input.ldexpArg, input.ldexpExp); + + floorVec = nbl::hlsl::floor(input.floorVec); + isnanVec = nbl::hlsl::isnan(input.isnanVec); + isinfVec = nbl::hlsl::isinf(input.isinfVec); + powVec = nbl::hlsl::pow(input.powXVec, input.powYVec); + expVec = nbl::hlsl::exp(input.expVec); + exp2Vec = nbl::hlsl::exp2(input.exp2Vec); + logVec = nbl::hlsl::log(input.logVec); + log2Vec = nbl::hlsl::log2(input.log2Vec); + absFVec = nbl::hlsl::abs(input.absFVec); + absIVec = nbl::hlsl::abs(input.absIVec); + sqrtVec = nbl::hlsl::sqrt(input.sqrtVec); + sinVec = nbl::hlsl::sin(input.sinVec); + cosVec = nbl::hlsl::cos(input.cosVec); + tanVec = nbl::hlsl::tan(input.tanVec); + asinVec = nbl::hlsl::asin(input.asinVec); + atanVec = nbl::hlsl::atan(input.atanVec); + sinhVec = nbl::hlsl::sinh(input.sinhVec); + coshVec = nbl::hlsl::cosh(input.coshVec); + tanhVec = nbl::hlsl::tanh(input.tanhVec); + asinhVec = nbl::hlsl::asinh(input.asinhVec); + acoshVec = nbl::hlsl::acosh(input.acoshVec); + atanhVec = nbl::hlsl::atanh(input.atanhVec); + atan2Vec = nbl::hlsl::atan2(input.atan2YVec, input.atan2XVec); + acosVec = nbl::hlsl::acos(input.acosVec); + modfVec = nbl::hlsl::modf(input.modfVec); + roundVec = nbl::hlsl::round(input.roundVec); + roundEvenVec = nbl::hlsl::roundEven(input.roundEvenVec); + truncVec = nbl::hlsl::trunc(input.truncVec); + ceilVec = nbl::hlsl::ceil(input.ceilVec); + fmaVec = nbl::hlsl::fma(input.fmaXVec, input.fmaYVec, input.fmaZVec); + ldexpVec = nbl::hlsl::ldexp(input.ldexpArgVec, input.ldexpExpVec); + erfVec = nbl::hlsl::erf(input.erfVec); + erfInvVec = nbl::hlsl::erfInv(input.erfInvVec); + + modfStruct = nbl::hlsl::modfStruct(input.modfStruct); + modfStructVec = nbl::hlsl::modfStruct(input.modfStructVec); + frexpStruct = nbl::hlsl::frexpStruct(input.frexpStruct); + frexpStructVec = nbl::hlsl::frexpStruct(input.frexpStructVec); + } +}; + +struct IntrinsicsIntputTestValues +{ + int bitCount; + float32_t3 crossLhs; + float32_t3 crossRhs; + float clampVal; + float clampMin; + float clampMax; + float32_t3 length; + float32_t3 normalize; + float32_t3 dotLhs; + float32_t3 dotRhs; + float32_t3x3 determinant; + uint32_t findMSB; + uint32_t findLSB; + float32_t3x3 inverse; + float32_t3x3 transpose; + float32_t3x3 mulLhs; + float32_t3x3 mulRhs; + float minA; + float minB; + float maxA; + float maxB; + float rsqrt; + uint32_t bitReverse; + float frac; + float mixX; + float mixY; + float mixA; + float sign; + float radians; + float degrees; + float stepEdge; + float stepX; + float smoothStepEdge0; + float smoothStepEdge1; + float smoothStepX; + uint32_t addCarryA; + uint32_t addCarryB; + uint32_t subBorrowA; + uint32_t subBorrowB; + + int32_t3 bitCountVec; + float32_t3 clampValVec; + float32_t3 clampMinVec; + float32_t3 clampMaxVec; + uint32_t3 findMSBVec; + uint32_t3 findLSBVec; + float32_t3 minAVec; + float32_t3 minBVec; + float32_t3 maxAVec; + float32_t3 maxBVec; + float32_t3 rsqrtVec; + uint32_t3 bitReverseVec; + float32_t3 fracVec; + float32_t3 mixXVec; + float32_t3 mixYVec; + float32_t3 mixAVec; + float32_t3 signVec; + float32_t3 radiansVec; + float32_t3 degreesVec; + float32_t3 stepEdgeVec; + float32_t3 stepXVec; + float32_t3 smoothStepEdge0Vec; + float32_t3 smoothStepEdge1Vec; + float32_t3 smoothStepXVec; + float32_t3 faceForwardN; + float32_t3 faceForwardI; + float32_t3 faceForwardNref; + float32_t3 reflectI; + float32_t3 reflectN; + float32_t3 refractI; + float32_t3 refractN; + float refractEta; + uint32_t3 addCarryAVec; + uint32_t3 addCarryBVec; + uint32_t3 subBorrowAVec; + uint32_t3 subBorrowBVec; +}; + +struct IntrinsicsTestValues +{ + int bitCount; + float clamp; + float length; + float dot; + float determinant; + int findMSB; + int findLSB; + float min; + float max; + float rsqrt; + float frac; + uint32_t bitReverse; + float mix; + float sign; + float radians; + float degrees; + float step; + float smoothStep; + + float32_t3 normalize; + float32_t3 cross; + int32_t3 bitCountVec; + float32_t3 clampVec; + uint32_t3 findMSBVec; + uint32_t3 findLSBVec; + float32_t3 minVec; + float32_t3 maxVec; + float32_t3 rsqrtVec; + uint32_t3 bitReverseVec; + float32_t3 fracVec; + float32_t3 mixVec; + float32_t3 signVec; + float32_t3 radiansVec; + float32_t3 degreesVec; + float32_t3 stepVec; + float32_t3 smoothStepVec; + float32_t3 faceForward; + float32_t3 reflect; + float32_t3 refract; + + float32_t3x3 mul; + float32_t3x3 transpose; + float32_t3x3 inverse; + + spirv::AddCarryOutput addCarry; + spirv::SubBorrowOutput subBorrow; + spirv::AddCarryOutput addCarryVec; + spirv::SubBorrowOutput subBorrowVec; + + void fillTestValues(NBL_CONST_REF_ARG(IntrinsicsIntputTestValues) input) + { + bitCount = nbl::hlsl::bitCount(input.bitCount); + cross = nbl::hlsl::cross(input.crossLhs, input.crossRhs); + clamp = nbl::hlsl::clamp(input.clampVal, input.clampMin, input.clampMax); + length = nbl::hlsl::length(input.length); + normalize = nbl::hlsl::normalize(input.normalize); + dot = nbl::hlsl::dot(input.dotLhs, input.dotRhs); + determinant = nbl::hlsl::determinant(input.determinant); + findMSB = nbl::hlsl::findMSB(input.findMSB); + findLSB = nbl::hlsl::findLSB(input.findLSB); + inverse = nbl::hlsl::inverse(input.inverse); + transpose = nbl::hlsl::transpose(input.transpose); + mul = nbl::hlsl::mul(input.mulLhs, input.mulRhs); + // TODO: fix min and max + min = nbl::hlsl::min(input.minA, input.minB); + max = nbl::hlsl::max(input.maxA, input.maxB); + rsqrt = nbl::hlsl::rsqrt(input.rsqrt); + bitReverse = nbl::hlsl::bitReverse(input.bitReverse); + frac = nbl::hlsl::fract(input.frac); + mix = nbl::hlsl::mix(input.mixX, input.mixY, input.mixA); + sign = nbl::hlsl::sign(input.sign); + radians = nbl::hlsl::radians(input.radians); + degrees = nbl::hlsl::degrees(input.degrees); + step = nbl::hlsl::step(input.stepEdge, input.stepX); + smoothStep = nbl::hlsl::smoothStep(input.smoothStepEdge0, input.smoothStepEdge1, input.smoothStepX); + + bitCountVec = nbl::hlsl::bitCount(input.bitCountVec); + clampVec = nbl::hlsl::clamp(input.clampValVec, input.clampMinVec, input.clampMaxVec); + findMSBVec = nbl::hlsl::findMSB(input.findMSBVec); + findLSBVec = nbl::hlsl::findLSB(input.findLSBVec); + // TODO: fix min and max + minVec = nbl::hlsl::min(input.minAVec, input.minBVec); + maxVec = nbl::hlsl::max(input.maxAVec, input.maxBVec); + rsqrtVec = nbl::hlsl::rsqrt(input.rsqrtVec); + bitReverseVec = nbl::hlsl::bitReverse(input.bitReverseVec); + fracVec = nbl::hlsl::fract(input.fracVec); + mixVec = nbl::hlsl::mix(input.mixXVec, input.mixYVec, input.mixAVec); + + signVec = nbl::hlsl::sign(input.signVec); + radiansVec = nbl::hlsl::radians(input.radiansVec); + degreesVec = nbl::hlsl::degrees(input.degreesVec); + stepVec = nbl::hlsl::step(input.stepEdgeVec, input.stepXVec); + smoothStepVec = nbl::hlsl::smoothStep(input.smoothStepEdge0Vec, input.smoothStepEdge1Vec, input.smoothStepXVec); + faceForward = nbl::hlsl::faceForward(input.faceForwardN, input.faceForwardI, input.faceForwardNref); + reflect = nbl::hlsl::reflect(input.reflectI, input.reflectN); + refract = nbl::hlsl::refract(input.refractI, input.refractN, input.refractEta); + addCarry = nbl::hlsl::addCarry(input.addCarryA, input.addCarryB); + subBorrow = nbl::hlsl::subBorrow(input.subBorrowA, input.subBorrowB); + addCarryVec = nbl::hlsl::addCarry(input.addCarryAVec, input.addCarryBVec); + subBorrowVec = nbl::hlsl::subBorrow(input.subBorrowAVec, input.subBorrowBVec); + } +}; + +#endif From f00bbf6fa914ec230df8a000deee75aee69cdce9 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 7 Apr 2025 19:48:46 -0300 Subject: [PATCH 06/57] Disable intrinsic tests for uSUbBorrow for the time being, start copying 22_CppCOmpat to run tests --- 12_Mortons/Tester.h | 417 +++++++++++++++++++++++++++ 12_Mortons/app_resources/common.hlsl | 38 ++- 12_Mortons/app_resources/shader.hlsl | 18 -- 12_Mortons/main.cpp | 198 +------------ 22_CppCompat/CIntrinsicsTester.h | 22 +- 5 files changed, 474 insertions(+), 219 deletions(-) create mode 100644 12_Mortons/Tester.h delete mode 100644 12_Mortons/app_resources/shader.hlsl diff --git a/12_Mortons/Tester.h b/12_Mortons/Tester.h new file mode 100644 index 000000000..5c4773111 --- /dev/null +++ b/12_Mortons/Tester.h @@ -0,0 +1,417 @@ +#ifndef _NBL_EXAMPLES_TESTS_12_MORTONS_I_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTONS_I_TESTER_INCLUDED_ + +#include +#include "app_resources/common.hlsl" +#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +using namespace nbl; + +class Tester +{ +public: + virtual ~Tester() + { + m_outputBufferAllocation.memory->unmap(); + }; + + struct PipelineSetupData + { + std::string testShaderPath; + + core::smart_refctd_ptr device; + core::smart_refctd_ptr api; + core::smart_refctd_ptr assetMgr; + core::smart_refctd_ptr logger; + video::IPhysicalDevice* physicalDevice; + uint32_t computeFamilyIndex; + }; + + template + void setupPipeline(const PipelineSetupData& pipleineSetupData) + { + // setting up pipeline in the constructor + m_device = core::smart_refctd_ptr(pipleineSetupData.device); + m_physicalDevice = pipleineSetupData.physicalDevice; + m_api = core::smart_refctd_ptr(pipleineSetupData.api); + m_assetMgr = core::smart_refctd_ptr(pipleineSetupData.assetMgr); + m_logger = core::smart_refctd_ptr(pipleineSetupData.logger); + m_queueFamily = pipleineSetupData.computeFamilyIndex; + m_semaphoreCounter = 0; + m_semaphore = m_device->createSemaphore(0); + m_cmdpool = m_device->createCommandPool(m_queueFamily, video::IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdpool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &m_cmdbuf)) + logFail("Failed to create Command Buffers!\n"); + + // Load shaders, set up pipeline + core::smart_refctd_ptr shader; + { + asset::IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + logFail("Could not load shader!"); + assert(0); + } + + // It would be super weird if loading a shader from a file produced more than 1 asset + assert(assets.size() == 1); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + + auto* compilerSet = m_assetMgr->getCompilerSet(); + + asset::IShaderCompiler::SCompilerOptions options = {}; + options.stage = source->getStage(); + options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; + options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); + + auto spirv = compilerSet->compileToSPIRV(source.get(), options); + + video::ILogicalDevice::SShaderCreationParameters params{}; + params.cpushader = spirv.get(); + shader = m_device->createShader(params); + } + + if (!shader) + logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + + video::IGPUDescriptorSetLayout::SBinding bindings[2] = { + { + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_COMPUTE, + .count = 1 + } + }; + + core::smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + m_pplnLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(dsLayout)); + if (!m_pplnLayout) + logFail("Failed to create a Pipeline Layout!\n"); + + { + video::IGPUComputePipeline::SCreationParams params = {}; + params.layout = m_pplnLayout.get(); + params.shader.entryPoint = "main"; + params.shader.shader = shader.get(); + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + logFail("Failed to create pipelines (compile & link shaders)!\n"); + } + + // Allocate memory of the input buffer + { + constexpr size_t BufferSize = sizeof(InputStruct); + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr inputBuff = m_device->createBuffer(std::move(params)); + if (!inputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + inputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_inputBufferAllocation = m_device->allocate(reqs, inputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_inputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(inputBuff->getBoundMemory().memory == m_inputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(inputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + // Allocate memory of the output buffer + { + constexpr size_t BufferSize = sizeof(OutputStruct); + + video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = video::IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + core::smart_refctd_ptr outputBuff = m_device->createBuffer(std::move(params)); + if (!outputBuff) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + outputBuff->setObjectDebugName("emulated_float64_t output buffer"); + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_outputBufferAllocation = m_device->allocate(reqs, outputBuff.get(), video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_outputBufferAllocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(outputBuff->getBoundMemory().memory == m_outputBufferAllocation.memory.get()); + core::smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + { + video::IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = core::smart_refctd_ptr(outputBuff); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + video::IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 1,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_outputBufferAllocation.memory->map({ 0ull,m_outputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_outputBufferAllocation.memory.get(), 0ull, m_outputBufferAllocation.memory->getAllocationSize()); + if (!m_outputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + assert(memoryRange.valid() && memoryRange.length >= sizeof(OutputStruct)); + + m_queue = m_device->getQueue(m_queueFamily, 0); + } + + enum class TestType + { + CPU, + GPU + }; + + template + void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) + { + static constexpr float MaxAllowedError = 0.1f; + if (std::abs(double(expectedVal) - double(testVal)) <= MaxAllowedError) + return; + + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << testVal << " expected value: " << expectedVal << '\n'; + + m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + } + + template + void verifyTestVector3dValue(const std::string& memberName, const nbl::hlsl::vector& expectedVal, const nbl::hlsl::vector& testVal, const TestType testType) + { + static constexpr float MaxAllowedError = 0.1f; + if (std::abs(double(expectedVal.x) - double(testVal.x)) <= MaxAllowedError && + std::abs(double(expectedVal.y) - double(testVal.y)) <= MaxAllowedError && + std::abs(double(expectedVal.z) - double(testVal.z)) <= MaxAllowedError) + return; + + std::stringstream ss; + switch (testType) + { + case TestType::CPU: + ss << "CPU TEST ERROR:\n"; + case TestType::GPU: + ss << "GPU TEST ERROR:\n"; + } + + ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << + testVal.x << ' ' << testVal.y << ' ' << testVal.z << + " expected value: " << expectedVal.x << ' ' << expectedVal.y << ' ' << expectedVal.z << '\n'; + + m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); + } + + template + void verifyTestMatrix3x3Value(const std::string& memberName, const nbl::hlsl::matrix& expectedVal, const nbl::hlsl::matrix& testVal, const TestType testType) + { + for (int i = 0; i < 3; ++i) + { + auto expectedValRow = expectedVal[i]; + auto testValRow = testVal[i]; + verifyTestVector3dValue(memberName, expectedValRow, testValRow, testType); + } + } + + void performTests() + { + m_logger->log("intrinsics.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + performCpuTests(testInput, expected); + performGpuTests(testInput, expected); + } + m_logger->log("intrinsics.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +protected: + uint32_t m_queueFamily; + core::smart_refctd_ptr m_device; + core::smart_refctd_ptr m_api; + video::IPhysicalDevice* m_physicalDevice; + core::smart_refctd_ptr m_assetMgr; + core::smart_refctd_ptr m_logger; + video::IDeviceMemoryAllocator::SAllocation m_inputBufferAllocation = {}; + video::IDeviceMemoryAllocator::SAllocation m_outputBufferAllocation = {}; + core::smart_refctd_ptr m_cmdbuf = nullptr; + core::smart_refctd_ptr m_cmdpool = nullptr; + core::smart_refctd_ptr m_ds = nullptr; + core::smart_refctd_ptr m_pplnLayout = nullptr; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_semaphore; + video::IQueue* m_queue; + uint64_t m_semaphoreCounter; + + template + OutputStruct dispatch(const InputStruct& input) + { + // Update input buffer + if (!m_inputBufferAllocation.memory->map({ 0ull,m_inputBufferAllocation.memory->getAllocationSize() }, video::IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + const video::ILogicalDevice::MappedMemoryRange memoryRange(m_inputBufferAllocation.memory.get(), 0ull, m_inputBufferAllocation.memory->getAllocationSize()); + if (!m_inputBufferAllocation.memory->getMemoryPropertyFlags().hasFlags(video::IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); + + std::memcpy(static_cast(m_inputBufferAllocation.memory->getMappedPointer()), &input, sizeof(InputStruct)); + + m_inputBufferAllocation.memory->unmap(); + + // record command buffer + m_cmdbuf->reset(video::IGPUCommandBuffer::RESET_FLAGS::NONE); + m_cmdbuf->begin(video::IGPUCommandBuffer::USAGE::NONE); + m_cmdbuf->beginDebugMarker("test", core::vector4df_SIMD(0, 1, 0, 1)); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_pplnLayout.get(), 0, 1, &m_ds.get()); + m_cmdbuf->dispatch(1, 1, 1); + m_cmdbuf->endDebugMarker(); + m_cmdbuf->end(); + + video::IQueue::SSubmitInfo submitInfos[1] = {}; + const video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const video::IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = m_semaphore.get(), .value = ++m_semaphoreCounter, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + submitInfos[0].signalSemaphores = signals; + + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + + m_device->waitIdle(); + OutputStruct output; + std::memcpy(&output, static_cast(m_outputBufferAllocation.memory->getMappedPointer()), sizeof(OutputStruct)); + m_device->waitIdle(); + + return output; + } + +private: + template + inline void logFail(const char* msg, Args&&... args) + { + m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); + exit(-1); + } + + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + cpuTestValues.fillTestValues(commonTestInputValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testType); + verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testType); + verifyTestValue("length", expectedTestValues.length, testValues.length, testType); + verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testType); + verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testType); + verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testType); + verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testType); + verifyTestValue("min", expectedTestValues.min, testValues.min, testType); + verifyTestValue("max", expectedTestValues.max, testValues.max, testType); + verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testType); + verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testType); + verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testType); + verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testType); + verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testType); + verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testType); + verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); + verifyTestValue("step", expectedTestValues.step, testValues.step, testType); + verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); + + verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); + verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); + verifyTestVector3dValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testType); + verifyTestVector3dValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testType); + verifyTestVector3dValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testType); + verifyTestVector3dValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testType); + verifyTestVector3dValue("minVec", expectedTestValues.minVec, testValues.minVec, testType); + verifyTestVector3dValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testType); + verifyTestVector3dValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testType); + verifyTestVector3dValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testType); + verifyTestVector3dValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testType); + verifyTestVector3dValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testType); + + verifyTestVector3dValue("signVec", expectedTestValues.signVec, testValues.signVec, testType); + verifyTestVector3dValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testType); + verifyTestVector3dValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testType); + verifyTestVector3dValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testType); + verifyTestVector3dValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testType); + verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); + verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); + verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); + + verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); + verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); + verifyTestMatrix3x3Value("inverse", expectedTestValues.inverse, testValues.inverse, testType); + } +}; + +#endif \ No newline at end of file diff --git a/12_Mortons/app_resources/common.hlsl b/12_Mortons/app_resources/common.hlsl index bd5184f80..9632bd372 100644 --- a/12_Mortons/app_resources/common.hlsl +++ b/12_Mortons/app_resources/common.hlsl @@ -1,13 +1,33 @@ -//#include "nbl/builtin/hlsl/morton.hlsl" -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h -NBL_CONSTEXPR uint32_t bufferSize = 256; +#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ -// Proper coverage would require writing tests for ALL possible sign, dimensions and width configurations -//using morton_t2 = nbl::hlsl::morton::code; // Fits in an int16_t -using vector_t2 = nbl::hlsl::vector; +// because DXC doesn't properly support `_Static_assert` +// TODO: add a message, and move to macros.h or cpp_compat +#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } -struct PushConstantData +#include + +#include + +// tgmath.hlsl and intrinsics.hlsl tests + +using namespace nbl::hlsl; +struct InputTestValues +{ + +}; + +struct TestValues { - uint64_t deviceBufferAddress; -}; \ No newline at end of file + + void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input) + { + + } +}; + +#endif diff --git a/12_Mortons/app_resources/shader.hlsl b/12_Mortons/app_resources/shader.hlsl deleted file mode 100644 index e7f570eee..000000000 --- a/12_Mortons/app_resources/shader.hlsl +++ /dev/null @@ -1,18 +0,0 @@ -#include "app_resources/common.hlsl" -#include "nbl/builtin/hlsl/bda/legacy_bda_accessor.hlsl" - -[[vk::push_constant]] PushConstantData pushConstants; - -[numthreads(bufferSize, 1, 1)] -void main(uint32_t3 ID : SV_DispatchThreadID) -{ - /* - LegacyBdaAccessor accessor = LegacyBdaAccessor::create(pushConstants.deviceBufferAddress); - - morton::code foo = morton::code::create(vector(-32768, -1)); - - //accessor.set(0, foo.value); - */ - uint32_t bar = _static_cast(0xCAFEDEADDEADBEEF); - accessor.set(0, bar); -} \ No newline at end of file diff --git a/12_Mortons/main.cpp b/12_Mortons/main.cpp index d1fddba7a..8118ec939 100644 --- a/12_Mortons/main.cpp +++ b/12_Mortons/main.cpp @@ -45,7 +45,17 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - + { + using namespace nbl::hlsl; + + auto bar = morton::code::create(hlsl::vector(893728, 7843, 98032)); + auto foo = _static_cast>(bar); + std::cout << foo[0] << " " << foo[1] << " " << foo[2] << " " << std::endl; + + //auto bar = morton::code::create(hlsl::vector(893728, 7843, 98032)); + //std::cout << "High Encoded: " << std::bitset<32>(bar.value.data.x) << std::endl; + //std::cout << "Low Encoded: " << std::bitset<32>(bar.value.data.y) << std::endl; + } /* // ----------------------------------------------- CPP TESTS ---------------------------------------------------------------------- @@ -193,188 +203,8 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // Unsigned assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) >= unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(false, true, true)); - - if(!TestHLSL) - return true; - */ - - - - - - - // ----------------------------------------------- HLSL COMPILATION + OPTIONAL TESTS ---------------------------------------------- - auto shader = createShader("app_resources/shader.hlsl"); - - // Create massive upload/download buffers - constexpr uint32_t DownstreamBufferSize = sizeof(uint32_t) << 23; - - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize); - if (!m_utils) - return logFail("Failed to create Utilities!"); - m_downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); - m_downStreamingBufferAddress = m_downStreamingBuffer->getBuffer()->getDeviceAddress(); - - // Create device-local buffer - { - IGPUBuffer::SCreationParams deviceLocalBufferParams = {}; - - IQueue* const queue = getComputeQueue(); - uint32_t queueFamilyIndex = queue->getFamilyIndex(); - - deviceLocalBufferParams.queueFamilyIndexCount = 1; - deviceLocalBufferParams.queueFamilyIndices = &queueFamilyIndex; - deviceLocalBufferParams.size = sizeof(uint32_t) * bufferSize; - deviceLocalBufferParams.usage = nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | nbl::asset::IBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; - - m_deviceLocalBuffer = m_device->createBuffer(std::move(deviceLocalBufferParams)); - auto mreqs = m_deviceLocalBuffer->getMemoryReqs(); - mreqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto gpubufMem = m_device->allocate(mreqs, m_deviceLocalBuffer.get(), IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT); - - m_deviceLocalBufferAddress = m_deviceLocalBuffer.get()->getDeviceAddress(); - } - - const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(PushConstantData) }; - - { - auto layout = m_device->createPipelineLayout({ &pcRange,1 }); - IGPUComputePipeline::SCreationParams params = {}; - params.layout = layout.get(); - params.shader.shader = shader.get(); - params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)); - params.shader.requireFullSubgroups = true; - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) - return logFail("Failed to create compute pipeline!\n"); - } - - const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); - // The ranges of non-coherent mapped memory you flush or invalidate need to be aligned. You'll often see a value of 64 reported by devices - // which just happens to coincide with a CPU cache line size. So we ask our streaming buffers during allocation to give us properly aligned offsets. - // Sidenote: For SSBOs, UBOs, BufferViews, Vertex Buffer Bindings, Acceleration Structure BDAs, Shader Binding Tables, Descriptor Buffers, etc. - // there is also a requirement to bind buffers at offsets which have a certain alignment. Memory binding to Buffers and Images also has those. - // We'll align to max of coherent atom size even if the memory is coherent, - // and we also need to take into account BDA shader loads need to be aligned to the type being loaded. - m_alignment = core::max(deviceLimits.nonCoherentAtomSize, alignof(float)); - - // Semaphor used here to know the FFT is done before download - m_timeline = m_device->createSemaphore(semaphorValue); - - IQueue* const queue = getComputeQueue(); - - const uint32_t inputSize = sizeof(uint32_t) * bufferSize; - - // Just need a single suballocation in this example - const uint32_t AllocationCount = 1; - - // We always just wait till an allocation becomes possible (during allocation previous "latched" frees get their latch conditions polled) - // Freeing of Streaming Buffer Allocations can and should be deferred until an associated polled event signals done (more on that later). - std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); - - // finally allocate our output range - const uint32_t outputSize = inputSize; - - auto outputOffset = m_downStreamingBuffer->invalid_value; - m_downStreamingBuffer->multi_allocate(waitTill, AllocationCount, &outputOffset, &outputSize, &m_alignment); - - smart_refctd_ptr cmdbuf; - { - smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdbuf)) { - return logFail("Failed to create Command Buffers!\n"); - } - cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmdbuf,1 }, core::smart_refctd_ptr(m_logger)); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->bindComputePipeline(m_pipeline.get()); - // This is the new fun part, pushing constants - const PushConstantData pc = { .deviceBufferAddress = m_deviceLocalBufferAddress }; - cmdbuf->pushConstants(m_pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(pc), &pc); - // Remember we do a single workgroup per 1D array in these parts - cmdbuf->dispatch(1, 1, 1); - - // Pipeline barrier: wait for FFT shader to be done before copying to downstream buffer - IGPUCommandBuffer::SPipelineBarrierDependencyInfo pipelineBarrierInfo = {}; - - decltype(pipelineBarrierInfo)::buffer_barrier_t barrier = {}; - pipelineBarrierInfo.bufBarriers = { &barrier, 1u }; - - barrier.range.buffer = m_deviceLocalBuffer; - - barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT; - barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS; - barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS; - - cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS(0), pipelineBarrierInfo); - - IGPUCommandBuffer::SBufferCopy copyInfo = {}; - copyInfo.srcOffset = 0; - copyInfo.dstOffset = 0; - copyInfo.size = m_deviceLocalBuffer->getSize(); - cmdbuf->copyBuffer(m_deviceLocalBuffer.get(), m_downStreamingBuffer->getBuffer(), 1, ©Info); - cmdbuf->end(); - } - - semaphorValue++; - { - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = - { - .cmdbuf = cmdbuf.get() - }; - const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = - { - .semaphore = m_timeline.get(), - .value = semaphorValue, - .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT - }; - - const IQueue::SSubmitInfo submitInfo = { - .waitSemaphores = {}, - .commandBuffers = {&cmdbufInfo,1}, - .signalSemaphores = {&signalInfo,1} - }; - - m_api->startCapture(); - queue->submit({ &submitInfo,1 }); - m_api->endCapture(); - } - - // We let all latches know what semaphore and counter value has to be passed for the functors to execute - const ISemaphore::SWaitInfo futureWait = { m_timeline.get(),semaphorValue }; - - // Now a new and even more advanced usage of the latched events, we make our own refcounted object with a custom destructor and latch that like we did the commandbuffer. - // Instead of making our own and duplicating logic, we'll use one from IUtilities meant for down-staging memory. - // Its nice because it will also remember to invalidate our memory mapping if its not coherent. - auto latchedConsumer = make_smart_refctd_ptr( - IDeviceMemoryAllocation::MemoryRange(outputOffset, outputSize), - // Note the use of capture by-value [=] and not by-reference [&] because this lambda will be called asynchronously whenever the event signals - [=](const size_t dstOffset, const void* bufSrc, const size_t size)->void - { - // The unused variable is used for letting the consumer know the subsection of the output we've managed to download - // But here we're sure we can get the whole thing in one go because we allocated the whole range ourselves. - assert(dstOffset == 0 && size == outputSize); - - std::cout << "Begin array GPU\n"; - uint32_t* const data = reinterpret_cast(const_cast(bufSrc)); - //std::cout << std::bitset<32>(data[0]) << "\n"; - std::cout << data[0] << "\n"; - /* - for (auto i = 0u; i < bufferSize; i++) { - std::cout << std::bitset<32>(data[i]) << "\n"; - } - */ - std::cout << "\nEnd array GPU\n"; - }, - // Its also necessary to hold onto the commandbuffer, even though we take care to not reset the parent pool, because if it - // hits its destructor, our automated reference counting will drop all references to objects used in the recorded commands. - // It could also be latched in the upstreaming deallocate, because its the same fence. - std::move(cmdbuf), m_downStreamingBuffer - ); - // We put a function we want to execute - m_downStreamingBuffer->multi_deallocate(AllocationCount, &outputOffset, &outputSize, futureWait, &latchedConsumer.get()); - return true; } @@ -387,12 +217,6 @@ class MortonTestApp final : public application_templates::MonoDeviceApplication, // Cleanup bool onAppTerminated() override { - // Need to make sure that there are no events outstanding if we want all lambdas to eventually execute before `onAppTerminated` - // (the destructors of the Command Pool Cache and Streaming buffers will still wait for all lambda events to drain) - if (TestHLSL) - { - while (m_downStreamingBuffer->cull_frees()) {} - } return device_base_t::onAppTerminated(); } diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index 5fe7bc08e..09219a9e7 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -147,6 +147,9 @@ class CIntrinsicsTester final : public ITester expected.step = glm::step(testInput.stepEdge, testInput.stepX); expected.smoothStep = glm::smoothstep(testInput.smoothStepEdge0, testInput.smoothStepEdge1, testInput.smoothStepX); + expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); + expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); + expected.frac = testInput.frac - std::floor(testInput.frac); expected.bitReverse = glm::bitfieldReverse(testInput.bitReverse); @@ -189,6 +192,9 @@ class CIntrinsicsTester final : public ITester expected.reflect = glm::reflect(testInput.reflectI, testInput.reflectN); expected.refract = glm::refract(testInput.refractI, testInput.refractN, testInput.refractEta); + expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); + expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); + auto mulGlm = nbl::hlsl::mul(testInput.mulLhs, testInput.mulRhs); expected.mul = reinterpret_cast(mulGlm); auto transposeGlm = glm::transpose(reinterpret_cast(testInput.transpose)); @@ -196,11 +202,6 @@ class CIntrinsicsTester final : public ITester auto inverseGlm = glm::inverse(reinterpret_cast(testInput.inverse)); expected.inverse = reinterpret_cast(inverseGlm); - expected.addCarry.result = glm::uaddCarry(testInput.addCarryA, testInput.addCarryB, expected.addCarry.carry); - expected.subBorrow.result = glm::usubBorrow(testInput.subBorrowA, testInput.subBorrowB, expected.subBorrow.borrow); - expected.addCarryVec.result = glm::uaddCarry(testInput.addCarryAVec, testInput.addCarryBVec, expected.addCarryVec.carry); - expected.subBorrowVec.result = glm::usubBorrow(testInput.subBorrowAVec, testInput.subBorrowBVec, expected.subBorrowVec.borrow); - performCpuTests(testInput, expected); performGpuTests(testInput, expected); } @@ -213,6 +214,7 @@ class CIntrinsicsTester final : public ITester void performCpuTests(const IntrinsicsIntputTestValues& commonTestInputValues, const IntrinsicsTestValues& expectedTestValues) { IntrinsicsTestValues cpuTestValues; + cpuTestValues.fillTestValues(commonTestInputValues); verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); @@ -245,6 +247,11 @@ class CIntrinsicsTester final : public ITester verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); verifyTestValue("step", expectedTestValues.step, testValues.step, testType); verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); + verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); + verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); + // Disabled: current glm implementation is wrong + //verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); + //verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); @@ -267,6 +274,11 @@ class CIntrinsicsTester final : public ITester verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); + verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); + verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); + // Disabled: current glm implementation is wrong + //verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); + //verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); From b2d87c36ad63c27b8547ea6583aa4c1ce716690d Mon Sep 17 00:00:00 2001 From: Fletterio Date: Thu, 24 Apr 2025 16:06:16 -0300 Subject: [PATCH 07/57] Added extensive tests for Morton codes --- 12_Mortons/Tester.h | 135 +++--- 12_Mortons/app_resources/common.hlsl | 453 +++++++++++++++++- 12_Mortons/app_resources/mortonTest.comp.hlsl | 16 + 12_Mortons/main.cpp | 298 +++--------- 22_CppCompat/ITester.h | 1 + 5 files changed, 604 insertions(+), 299 deletions(-) create mode 100644 12_Mortons/app_resources/mortonTest.comp.hlsl diff --git a/12_Mortons/Tester.h b/12_Mortons/Tester.h index 5c4773111..480328d18 100644 --- a/12_Mortons/Tester.h +++ b/12_Mortons/Tester.h @@ -1,5 +1,5 @@ -#ifndef _NBL_EXAMPLES_TESTS_12_MORTONS_I_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_12_MORTONS_I_TESTER_INCLUDED_ +#ifndef _NBL_EXAMPLES_TESTS_12_MORTONS_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTONS_TESTER_INCLUDED_ #include #include "app_resources/common.hlsl" @@ -128,7 +128,7 @@ class Tester if (!inputBuff) logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - inputBuff->setObjectDebugName("emulated_float64_t output buffer"); + inputBuff->setObjectDebugName("morton input buffer"); video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); @@ -163,7 +163,7 @@ class Tester if (!outputBuff) logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - outputBuff->setObjectDebugName("emulated_float64_t output buffer"); + outputBuff->setObjectDebugName("morton output buffer"); video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); @@ -208,8 +208,7 @@ class Tester template void verifyTestValue(const std::string& memberName, const T& expectedVal, const T& testVal, const TestType testType) { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal) - double(testVal)) <= MaxAllowedError) + if (expectedVal == testVal) return; std::stringstream ss; @@ -221,7 +220,7 @@ class Tester ss << "GPU TEST ERROR:\n"; } - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << testVal << " expected value: " << expectedVal << '\n'; + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; //test value: " << testVal << " expected value: " << expectedVal << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } @@ -240,6 +239,7 @@ class Tester { case TestType::CPU: ss << "CPU TEST ERROR:\n"; + break; case TestType::GPU: ss << "GPU TEST ERROR:\n"; } @@ -251,32 +251,60 @@ class Tester m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } - template - void verifyTestMatrix3x3Value(const std::string& memberName, const nbl::hlsl::matrix& expectedVal, const nbl::hlsl::matrix& testVal, const TestType testType) - { - for (int i = 0; i < 3; ++i) - { - auto expectedValRow = expectedVal[i]; - auto testValRow = testVal[i]; - verifyTestVector3dValue(memberName, expectedValRow, testValRow, testType); - } - } - void performTests() { - m_logger->log("intrinsics.hlsl TESTS:", system::ILogger::ELL_PERFORMANCE); + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); for (int i = 0; i < Iterations; ++i) { // Set input thest values that will be used in both CPU and GPU tests InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values TestValues expected; + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + uint64_t generatedA = longDistribution(mt); + uint64_t generatedB = longDistribution(mt); + + testInput.generatedA = generatedA; + testInput.generatedB = generatedB; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + uint64_t coordX = longDistribution(mt); + uint64_t coordY = longDistribution(mt); + uint64_t coordZ = longDistribution(mt); + uint64_t coordW = longDistribution(mt); + + + } + performCpuTests(testInput, expected); performGpuTests(testInput, expected); } - m_logger->log("intrinsics.hlsl TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); } protected: @@ -354,7 +382,7 @@ class Tester { TestValues cpuTestValues; cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + verifyTestValues(expectedTestValues, cpuTestValues, TestType::CPU); } @@ -362,55 +390,26 @@ class Tester { TestValues gpuTestValues; gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + verifyTestValues(expectedTestValues, gpuTestValues, TestType::GPU); } - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, TestType testType) { - verifyTestValue("bitCount", expectedTestValues.bitCount, testValues.bitCount, testType); - verifyTestValue("clamp", expectedTestValues.clamp, testValues.clamp, testType); - verifyTestValue("length", expectedTestValues.length, testValues.length, testType); - verifyTestValue("dot", expectedTestValues.dot, testValues.dot, testType); - verifyTestValue("determinant", expectedTestValues.determinant, testValues.determinant, testType); - verifyTestValue("findMSB", expectedTestValues.findMSB, testValues.findMSB, testType); - verifyTestValue("findLSB", expectedTestValues.findLSB, testValues.findLSB, testType); - verifyTestValue("min", expectedTestValues.min, testValues.min, testType); - verifyTestValue("max", expectedTestValues.max, testValues.max, testType); - verifyTestValue("rsqrt", expectedTestValues.rsqrt, testValues.rsqrt, testType); - verifyTestValue("frac", expectedTestValues.frac, testValues.frac, testType); - verifyTestValue("bitReverse", expectedTestValues.bitReverse, testValues.bitReverse, testType); - verifyTestValue("mix", expectedTestValues.mix, testValues.mix, testType); - verifyTestValue("sign", expectedTestValues.sign, testValues.sign, testType); - verifyTestValue("radians", expectedTestValues.radians, testValues.radians, testType); - verifyTestValue("degrees", expectedTestValues.degrees, testValues.degrees, testType); - verifyTestValue("step", expectedTestValues.step, testValues.step, testType); - verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); - - verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); - verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); - verifyTestVector3dValue("bitCountVec", expectedTestValues.bitCountVec, testValues.bitCountVec, testType); - verifyTestVector3dValue("clampVec", expectedTestValues.clampVec, testValues.clampVec, testType); - verifyTestVector3dValue("findMSBVec", expectedTestValues.findMSBVec, testValues.findMSBVec, testType); - verifyTestVector3dValue("findLSBVec", expectedTestValues.findLSBVec, testValues.findLSBVec, testType); - verifyTestVector3dValue("minVec", expectedTestValues.minVec, testValues.minVec, testType); - verifyTestVector3dValue("maxVec", expectedTestValues.maxVec, testValues.maxVec, testType); - verifyTestVector3dValue("rsqrtVec", expectedTestValues.rsqrtVec, testValues.rsqrtVec, testType); - verifyTestVector3dValue("bitReverseVec", expectedTestValues.bitReverseVec, testValues.bitReverseVec, testType); - verifyTestVector3dValue("fracVec", expectedTestValues.fracVec, testValues.fracVec, testType); - verifyTestVector3dValue("mixVec", expectedTestValues.mixVec, testValues.mixVec, testType); - - verifyTestVector3dValue("signVec", expectedTestValues.signVec, testValues.signVec, testType); - verifyTestVector3dValue("radiansVec", expectedTestValues.radiansVec, testValues.radiansVec, testType); - verifyTestVector3dValue("degreesVec", expectedTestValues.degreesVec, testValues.degreesVec, testType); - verifyTestVector3dValue("stepVec", expectedTestValues.stepVec, testValues.stepVec, testType); - verifyTestVector3dValue("smoothStepVec", expectedTestValues.smoothStepVec, testValues.smoothStepVec, testType); - verifyTestVector3dValue("faceForward", expectedTestValues.faceForward, testValues.faceForward, testType); - verifyTestVector3dValue("reflect", expectedTestValues.reflect, testValues.reflect, testType); - verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); - - verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); - verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); - verifyTestMatrix3x3Value("inverse", expectedTestValues.inverse, testValues.inverse, testType); + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); + + //verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); } }; diff --git a/12_Mortons/app_resources/common.hlsl b/12_Mortons/app_resources/common.hlsl index 9632bd372..be6a2f4a0 100644 --- a/12_Mortons/app_resources/common.hlsl +++ b/12_Mortons/app_resources/common.hlsl @@ -13,20 +13,471 @@ #include -// tgmath.hlsl and intrinsics.hlsl tests +NBL_CONSTEXPR uint16_t smallBits_2 = 8; +NBL_CONSTEXPR uint16_t mediumBits_2 = 16; +NBL_CONSTEXPR uint16_t fullBits_2 = 32; +NBL_CONSTEXPR uint16_t smallBits_3 = 5; +NBL_CONSTEXPR uint16_t mediumBits_3 = 10; +NBL_CONSTEXPR uint16_t fullBits_3 = 21; +NBL_CONSTEXPR uint16_t smallBits_4 = 4; +NBL_CONSTEXPR uint16_t mediumBits_4 = 8; +NBL_CONSTEXPR uint16_t fullBits_4 = 16; using namespace nbl::hlsl; struct InputTestValues { + // Both tests + uint32_t shift; + + // Emulated int tests + uint64_t generatedA; + uint64_t generatedB; + // Morton tests + uint64_t coordX; + uint64_t coordY; + uint64_t coordZ; + uint64_t coordW; }; struct TestValues { + // Emulated int tests + emulated_uint64_t emulatedAnd; + emulated_uint64_t emulatedOr; + emulated_uint64_t emulatedXor; + emulated_uint64_t emulatedNot; + emulated_uint64_t emulatedPlus; + emulated_uint64_t emulatedMinus; + // These are bools but stored as uint because you can't store bools, causes a SPIR-V issue + uint32_t emulatedLess; + uint32_t emulatedLessEqual; + uint32_t emulatedGreater; + uint32_t emulatedGreaterEqual; + emulated_uint64_t emulatedLeftShifted; + emulated_uint64_t emulatedUnsignedRightShifted; + emulated_int64_t emulatedSignedRightShifted; + + // Morton tests - for each dimension let's do one small, medium and full-szied (max bits possible) test to cover representation with + // 16, 32 and 64-bit types. Could make it more exhaustive with macros (test all possible bitwidths) + // For emulated mortons, we store only the emulated uint64 representing it, because DXC complains about bitcasts otherwise + + // Plus + morton::code mortonPlus_small_2; + morton::code mortonPlus_medium_2; + morton::code mortonPlus_full_2; + morton::code mortonPlus_emulated_2; + + morton::code mortonPlus_small_3; + morton::code mortonPlus_medium_3; + morton::code mortonPlus_full_3; + morton::code mortonPlus_emulated_3; + + morton::code mortonPlus_small_4; + morton::code mortonPlus_medium_4; + morton::code mortonPlus_full_4; + morton::code mortonPlus_emulated_4; + + // Minus + morton::code mortonMinus_small_2; + morton::code mortonMinus_medium_2; + morton::code mortonMinus_full_2; + morton::code mortonMinus_emulated_2; + + morton::code mortonMinus_small_3; + morton::code mortonMinus_medium_3; + morton::code mortonMinus_full_3; + morton::code mortonMinus_emulated_3; + + morton::code mortonMinus_small_4; + morton::code mortonMinus_medium_4; + morton::code mortonMinus_full_4; + morton::code mortonMinus_emulated_4; + + // Coordinate-wise equality (these are bools) + uint32_t2 mortonEqual_small_2; + uint32_t2 mortonEqual_medium_2; + uint32_t2 mortonEqual_full_2; + uint32_t2 mortonEqual_emulated_2; + + uint32_t3 mortonEqual_small_3; + uint32_t3 mortonEqual_medium_3; + uint32_t3 mortonEqual_full_3; + uint32_t3 mortonEqual_emulated_3; + + uint32_t4 mortonEqual_small_4; + uint32_t4 mortonEqual_medium_4; + uint32_t4 mortonEqual_full_4; + uint32_t4 mortonEqual_emulated_4; + + // Coordinate-wise unsigned inequality (just testing with less, again these are bools) + uint32_t2 mortonUnsignedLess_small_2; + uint32_t2 mortonUnsignedLess_medium_2; + uint32_t2 mortonUnsignedLess_full_2; + uint32_t2 mortonUnsignedLess_emulated_2; + + uint32_t3 mortonUnsignedLess_small_3; + uint32_t3 mortonUnsignedLess_medium_3; + uint32_t3 mortonUnsignedLess_full_3; + uint32_t3 mortonUnsignedLess_emulated_3; + + uint32_t4 mortonUnsignedLess_small_4; + uint32_t4 mortonUnsignedLess_medium_4; + uint32_t4 mortonUnsignedLess_full_4; + uint32_t4 mortonUnsignedLess_emulated_4; + + // Coordinate-wise signed inequality (bools) + uint32_t2 mortonSignedLess_small_2; + uint32_t2 mortonSignedLess_medium_2; + uint32_t2 mortonSignedLess_full_2; + uint32_t2 mortonSignedLess_emulated_2; + + uint32_t3 mortonSignedLess_small_3; + uint32_t3 mortonSignedLess_medium_3; + uint32_t3 mortonSignedLess_full_3; + uint32_t3 mortonSignedLess_emulated_3; + + uint32_t4 mortonSignedLess_small_4; + uint32_t4 mortonSignedLess_medium_4; + uint32_t4 mortonSignedLess_full_4; + uint32_t4 mortonSignedLess_emulated_4; + + // Left-shift + morton::code mortonLeftShift_small_2; + morton::code mortonLeftShift_medium_2; + morton::code mortonLeftShift_full_2; + morton::code mortonLeftShift_emulated_2; + + morton::code mortonLeftShift_small_3; + morton::code mortonLeftShift_medium_3; + morton::code mortonLeftShift_full_3; + morton::code mortonLeftShift_emulated_3; + + morton::code mortonLeftShift_small_4; + morton::code mortonLeftShift_medium_4; + morton::code mortonLeftShift_full_4; + morton::code mortonLeftShift_emulated_4; + + // Unsigned right-shift + morton::code mortonUnsignedRightShift_small_2; + morton::code mortonUnsignedRightShift_medium_2; + morton::code mortonUnsignedRightShift_full_2; + morton::code mortonUnsignedRightShift_emulated_2; + + morton::code mortonUnsignedRightShift_small_3; + morton::code mortonUnsignedRightShift_medium_3; + morton::code mortonUnsignedRightShift_full_3; + morton::code mortonUnsignedRightShift_emulated_3; + + morton::code mortonUnsignedRightShift_small_4; + morton::code mortonUnsignedRightShift_medium_4; + morton::code mortonUnsignedRightShift_full_4; + morton::code mortonUnsignedRightShift_emulated_4; + + // Signed right-shift + morton::code mortonSignedRightShift_small_2; + morton::code mortonSignedRightShift_medium_2; + morton::code mortonSignedRightShift_full_2; + morton::code mortonSignedRightShift_emulated_2; + + morton::code mortonSignedRightShift_small_3; + morton::code mortonSignedRightShift_medium_3; + morton::code mortonSignedRightShift_full_3; + morton::code mortonSignedRightShift_emulated_3; + + morton::code mortonSignedRightShift_small_4; + morton::code mortonSignedRightShift_medium_4; + morton::code mortonSignedRightShift_full_4; + morton::code mortonSignedRightShift_emulated_4; void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input) { + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + + // Emulated int tests + emulatedAnd = emulatedA & emulatedB; + emulatedOr = emulatedA | emulatedB; + emulatedXor = emulatedA ^ emulatedB; + emulatedNot = emulatedA.operator~(); + emulatedPlus = emulatedA + emulatedB; + emulatedMinus = emulatedA - emulatedB; + emulatedLess = uint32_t(emulatedA < emulatedB); + emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + emulatedGreater = uint32_t(emulatedA > emulatedB); + emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + int64_t2 Vec2ASigned = int64_t2(Vec2A); + int64_t2 Vec2BSigned = int64_t2(Vec2B); + + int64_t3 Vec3ASigned = int64_t3(Vec3A); + int64_t3 Vec3BSigned = int64_t3(Vec3B); + + int64_t4 Vec4ASigned = int64_t4(Vec4A); + int64_t4 Vec4BSigned = int64_t4(Vec4B); + + morton::code morton_small_2A = morton::code::create(Vec2A); + morton::code morton_medium_2A = morton::code::create(Vec2A); + morton::code morton_full_2A = morton::code::create(Vec2A); + morton::code morton_emulated_2A = morton::code::create(Vec2A); + morton::code morton_small_2B = morton::code::create(Vec2B); + morton::code morton_medium_2B = morton::code::create(Vec2B); + morton::code morton_full_2B = morton::code::create(Vec2B); + morton::code morton_emulated_2B = morton::code::create(Vec2B); + + morton::code morton_small_3A = morton::code::create(Vec3A); + morton::code morton_medium_3A = morton::code::create(Vec3A); + morton::code morton_full_3A = morton::code::create(Vec3A); + morton::code morton_emulated_3A = morton::code::create(Vec3A); + morton::code morton_small_3B = morton::code::create(Vec3B); + morton::code morton_medium_3B = morton::code::create(Vec3B); + morton::code morton_full_3B = morton::code::create(Vec3B); + morton::code morton_emulated_3B = morton::code::create(Vec3B); + + morton::code morton_small_4A = morton::code::create(Vec4A); + morton::code morton_medium_4A = morton::code::create(Vec4A); + morton::code morton_full_4A = morton::code::create(Vec4A); + morton::code morton_emulated_4A = morton::code::create(Vec4A); + morton::code morton_small_4B = morton::code::create(Vec4B); + morton::code morton_medium_4B = morton::code::create(Vec4B); + morton::code morton_full_4B = morton::code::create(Vec4B); + morton::code morton_emulated_4B = morton::code::create(Vec4B); + + morton::code morton_small_2ASigned = morton::code::create(Vec2ASigned); + morton::code morton_medium_2ASigned = morton::code::create(Vec2ASigned); + morton::code morton_full_2ASigned = morton::code::create(Vec2ASigned); + morton::code morton_emulated_2ASigned = morton::code::create(Vec2ASigned); + morton::code morton_small_2BSigned = morton::code::create(Vec2BSigned); + morton::code morton_medium_2BSigned = morton::code::create(Vec2BSigned); + morton::code morton_full_2BSigned = morton::code::create(Vec2BSigned); + morton::code morton_emulated_2BSigned = morton::code::create(Vec2BSigned); + + morton::code morton_small_3ASigned = morton::code::create(Vec3ASigned); + morton::code morton_medium_3ASigned = morton::code::create(Vec3ASigned); + morton::code morton_full_3ASigned = morton::code::create(Vec3ASigned); + morton::code morton_emulated_3ASigned = morton::code::create(Vec3ASigned); + morton::code morton_small_3BSigned = morton::code::create(Vec3BSigned); + morton::code morton_medium_3BSigned = morton::code::create(Vec3BSigned); + morton::code morton_full_3BSigned = morton::code::create(Vec3BSigned); + morton::code morton_emulated_3BSigned = morton::code::create(Vec3BSigned); + + morton::code morton_small_4ASigned = morton::code::create(Vec4ASigned); + morton::code morton_medium_4ASigned = morton::code::create(Vec4ASigned); + morton::code morton_full_4ASigned = morton::code::create(Vec4ASigned); + morton::code morton_emulated_4ASigned = morton::code::create(Vec4ASigned); + morton::code morton_small_4BSigned = morton::code::create(Vec4BSigned); + morton::code morton_medium_4BSigned = morton::code::create(Vec4BSigned); + morton::code morton_full_4BSigned = morton::code::create(Vec4BSigned); + morton::code morton_emulated_4BSigned = morton::code::create(Vec4BSigned); + + /* + left_shift_operator > leftShiftTemp; + portable_vector_t interleaved = _static_cast >(uint16_t4(Vec4B)) & morton::impl::coding_mask_v<4, fullBits_4, morton::impl::CodingStages, emulated_uint64_t>; + + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(fullBits_4 > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShiftTemp(interleaved, (uint16_t(1) << I) * (4 - 1));\ + interleaved = interleaved & _static_cast(morton::impl::coding_mask<4, fullBits_4, I>::value);\ + } + + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION + // After interleaving, shift each coordinate left by their index + return leftShiftTemp(interleaved, truncate >(vector(0, 1, 2, 3))); + + + array_get, emulated_uint64_t> getter; + emulatedAnd = getter(interleaved, 0); + */ + + // Plus + mortonPlus_small_2 = morton_small_2A + morton_small_2B; + mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + mortonPlus_full_2 = morton_full_2A + morton_full_2B; + mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + mortonPlus_small_3 = morton_small_3A + morton_small_3B; + mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + mortonPlus_full_3 = morton_full_3A + morton_full_3B; + mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + mortonPlus_small_4 = morton_small_4A + morton_small_4B; + mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + mortonPlus_full_4 = morton_full_4A + morton_full_4B; + mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + mortonMinus_small_2 = morton_small_2A - morton_small_2B; + mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + mortonMinus_full_2 = morton_full_2A - morton_full_2B; + mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + mortonMinus_small_3 = morton_small_3A - morton_small_3B; + mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + mortonMinus_full_3 = morton_full_3A - morton_full_3B; + mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + mortonMinus_small_4 = morton_small_4A - morton_small_4B; + mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + mortonMinus_full_4 = morton_full_4A - morton_full_4B; + mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); + mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); + mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); + mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); + + mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); + mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); + mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); + mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); + + mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); + mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); + mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); + mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); + + // Coordinate-wise unsigned inequality (just testing with less) + mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); + mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); + mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); + mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); + + mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); + mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); + mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); + mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + + mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); + mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); + mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); + mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); + + // Coordinate-wise signed inequality + mortonSignedLess_small_2 = uint32_t2(morton_small_2ASigned.lessThan(int16_t2(Vec2BSigned))); + mortonSignedLess_medium_2 = uint32_t2(morton_medium_2ASigned.lessThan(int16_t2(Vec2BSigned))); + mortonSignedLess_full_2 = uint32_t2(morton_full_2ASigned.lessThan(int32_t2(Vec2BSigned))); + //mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2ASigned.lessThan(int32_t2(Vec2BSigned))); + + mortonSignedLess_small_3 = uint32_t3(morton_small_3ASigned.lessThan(int16_t3(Vec3BSigned))); + mortonSignedLess_medium_3 = uint32_t3(morton_medium_3ASigned.lessThan(int16_t3(Vec3BSigned))); + mortonSignedLess_full_3 = uint32_t3(morton_full_3ASigned.lessThan(int32_t3(Vec3BSigned))); + //mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3ASigned.lessThan(int32_t3(Vec3BSigned))); + + mortonSignedLess_small_4 = uint32_t4(morton_small_4ASigned.lessThan(int16_t4(Vec4BSigned))); + mortonSignedLess_medium_4 = uint32_t4(morton_medium_4ASigned.lessThan(int16_t4(Vec4BSigned))); + mortonSignedLess_full_4 = uint32_t4(morton_full_4ASigned.lessThan(int16_t4(Vec4BSigned))); + //mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4ASigned.lessThan(int16_t4(Vec4BSigned))); + + // Left-shift + uint16_t castedShift = uint16_t(input.shift); + left_shift_operator > leftShiftSmall2; + mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift); + left_shift_operator > leftShiftMedium2; + mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift); + left_shift_operator > leftShiftFull2; + mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift); + left_shift_operator > leftShiftEmulated2; + mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift); + + left_shift_operator > leftShiftSmall3; + mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift); + left_shift_operator > leftShiftMedium3; + mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift); + left_shift_operator > leftShiftFull3; + mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift); + left_shift_operator > leftShiftEmulated3; + mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift); + + left_shift_operator > leftShiftSmall4; + mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift); + left_shift_operator > leftShiftMedium4; + mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift); + left_shift_operator > leftShiftFull4; + mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift); + left_shift_operator > leftShiftEmulated4; + mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift); + arithmetic_right_shift_operator > rightShiftMedium2; + mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift); + arithmetic_right_shift_operator > rightShiftFull2; + mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift); + arithmetic_right_shift_operator > rightShiftEmulated2; + mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift); + + arithmetic_right_shift_operator > rightShiftSmall3; + mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift); + arithmetic_right_shift_operator > rightShiftMedium3; + mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift); + arithmetic_right_shift_operator > rightShiftFull3; + mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift); + arithmetic_right_shift_operator > rightShiftEmulated3; + mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift); + + arithmetic_right_shift_operator > rightShiftSmall4; + mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift); + arithmetic_right_shift_operator > rightShiftMedium4; + mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift); + arithmetic_right_shift_operator > rightShiftFull4; + mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift); + arithmetic_right_shift_operator > rightShiftEmulated4; + mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedFull2; + mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedEmulated2; + //mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2ASigned, castedShift); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedFull3; + mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedEmulated3; + //mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedSmall4; + mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedFull4; + mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedEmulated4; + //mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4ASigned, castedShift); } }; diff --git a/12_Mortons/app_resources/mortonTest.comp.hlsl b/12_Mortons/app_resources/mortonTest.comp.hlsl new file mode 100644 index 000000000..7041568b8 --- /dev/null +++ b/12_Mortons/app_resources/mortonTest.comp.hlsl @@ -0,0 +1,16 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h +#pragma shader_stage(compute) + +#include "common.hlsl" + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(256, 1, 1)] +void main(uint3 invocationID : SV_DispatchThreadID) +{ + if (invocationID.x == 0) + outputTestValues[0].fillTestValues(inputTestValues[0]); +} diff --git a/12_Mortons/main.cpp b/12_Mortons/main.cpp index 8118ec939..f83c49b9e 100644 --- a/12_Mortons/main.cpp +++ b/12_Mortons/main.cpp @@ -1,242 +1,80 @@ -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include +#include +#include +#include - -// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "app_resources/common.hlsl" -#include - -// Right now the test only checks that HLSL compiles the file -constexpr bool TestHLSL = true; +#include "Tester.h" -using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::video; +using namespace nbl::application_templates; -// this time instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platforms -class MortonTestApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class MortonTest final : public MonoDeviceApplication, public MonoAssetManagerAndBuiltinResourceApplication { - using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; - - inline core::smart_refctd_ptr createShader( - const char* includeMainName) - { - std::string prelude = "#include \""; - auto CPUShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_SHADER_STAGE::ESS_COMPUTE, IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); - assert(CPUShader); - return m_device->createShader(CPUShader.get()); - } - public: - MortonTestApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : - system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - // we stuff all our work here because its a "single shot" app - bool onAppInitialized(smart_refctd_ptr&& system) override - { - // Remember to call the base class initialization! - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!asset_base_t::onAppInitialized(std::move(system))) - return false; - { - using namespace nbl::hlsl; - - auto bar = morton::code::create(hlsl::vector(893728, 7843, 98032)); - auto foo = _static_cast>(bar); - std::cout << foo[0] << " " << foo[1] << " " << foo[2] << " " << std::endl; - - //auto bar = morton::code::create(hlsl::vector(893728, 7843, 98032)); - //std::cout << "High Encoded: " << std::bitset<32>(bar.value.data.x) << std::endl; - //std::cout << "Low Encoded: " << std::bitset<32>(bar.value.data.y) << std::endl; - } - /* - - // ----------------------------------------------- CPP TESTS ---------------------------------------------------------------------- - - // Coordinate extraction and whole vector decode tests - { - morton_t morton(vector_t(-1011, 765, 248)); - unsigned_morton_t unsignedMorton(unsigned_vector_t(154, 789, 1011)); - - assert(morton.getCoordinate(0) == -1011 && morton.getCoordinate(1) == 765 && morton.getCoordinate(2) == 248); - assert(unsignedMorton.getCoordinate(0) == 154u && unsignedMorton.getCoordinate(1) == 789u && unsignedMorton.getCoordinate(2) == 1011u); - - assert(static_cast(morton) == vector_t(-1011, 765, 248) && static_cast(unsignedMorton) == unsigned_vector_t(154, 789, 1011)); - } - - // *********************************************************************************************************************************** - // ************************************************* Arithmetic operator tests ******************************************************* - // *********************************************************************************************************************************** - - // ---------------------------------------------------------------------------------------------------- - // --------------------------------------- ADDITION --------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // ---------------------------------------- Signed ----------------------------------------------------- - - // No overflow - assert(static_cast(morton_t(vector_t(-1011, 765, 248)) + morton_t(vector_t(1000, -985, 200))) == vector_t(-11, -220, 448)); - - // Type 1 overflow: Addition of representable coordinates goes out of range - assert(static_cast(morton_t(vector_t(-900, 70, 500)) + morton_t(vector_t(-578, -50, 20))) == vector_t(570, 20, -504)); - - // Type 2 overflow: Addition of irrepresentable range gives correct result - assert(static_cast(morton_t(vector_t(54, 900, -475)) + morton_t(vector_t(46, -1437, 699))) == vector_t(100, -537, 224)); - - // ---------------------------------------- Unsigned ----------------------------------------------------- - - // No overflow - assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(1563, 754, 220))) == unsigned_vector_t(1945, 1664, 763)); - - // Type 1 overflow: Addition of representable coordinates goes out of range - assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(2000, 2000, 1000))) == unsigned_vector_t(334, 862, 519)); - - // Type 2 overflow: Addition of irrepresentable range gives correct result - assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) + unsigned_morton_t(unsigned_vector_t(-143, -345, -233))) == unsigned_vector_t(239, 565, 310)); - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- SUBTRACTION ------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // ---------------------------------------- Signed ----------------------------------------------------- - - // No overflow - assert(static_cast(morton_t(vector_t(1000, 764, -365)) - morton_t(vector_t(834, -243, 100))) == vector_t(166, 1007, -465)); - - // Type 1 overflow: Subtraction of representable coordinates goes out of range - assert(static_cast(morton_t(vector_t(-900, 70, 500)) - morton_t(vector_t(578, -50, -20))) == vector_t(570, 120, -504)); - - // Type 2 overflow: Subtraction of irrepresentable range gives correct result - assert(static_cast(morton_t(vector_t(54, 900, -475)) - morton_t(vector_t(-46, 1437, -699))) == vector_t(100, -537, 224)); - - // ---------------------------------------- Unsigned ----------------------------------------------------- - - // No overflow - assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) - unsigned_morton_t(unsigned_vector_t(322, 564, 299))) == unsigned_vector_t(60, 346, 244)); - - // Type 1 overflow: Subtraction of representable coordinates goes out of range - assert(static_cast(unsigned_morton_t(unsigned_vector_t(382, 910, 543)) - unsigned_morton_t(unsigned_vector_t(2000, 2000, 1000))) == unsigned_vector_t(430, 958, 567)); - - // Type 2 overflow: Subtraction of irrepresentable range gives correct result - assert(static_cast(unsigned_morton_t(unsigned_vector_t(54, 900, 475)) - unsigned_morton_t(unsigned_vector_t(-865, -100, -10))) == unsigned_vector_t(919, 1000, 485)); - - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- UNARY NEGATION ---------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // Only makes sense for signed - assert(static_cast(- morton_t(vector_t(-1024, 543, -475))) == vector_t(-1024, -543, 475)); - - // *********************************************************************************************************************************** - // ************************************************* Comparison operator tests ******************************************************* - // *********************************************************************************************************************************** - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- OPERATOR< --------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // Signed - - // Same sign, negative - assert(morton_t(vector_t(-954, -455, -333)) < morton_t(vector_t(-433, -455, -433)) == bool_vector_t(true, false, false)); - // Same sign, positive - assert(morton_t(vector_t(954, 455, 333)) < morton_t(vector_t(433, 455, 433)) == bool_vector_t(false, false, true)); - // Differing signs - assert(morton_t(vector_t(954, -32, 0)) < morton_t(vector_t(-44, 0, -1)) == bool_vector_t(false, true, false)); - - // Unsigned - assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) < unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(true, false, false)); - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- OPERATOR<= -------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // Signed - - // Same sign, negative - assert(morton_t(vector_t(-954, -455, -333)) <= morton_t(vector_t(-433, -455, -433)) == bool_vector_t(true, true, false)); - // Same sign, positive - assert(morton_t(vector_t(954, 455, 333)) <= morton_t(vector_t(433, 455, 433)) == bool_vector_t(false, true, true)); - // Differing signs - assert(morton_t(vector_t(954, -32, 0)) <= morton_t(vector_t(-44, 0, -1)) == bool_vector_t(false, true, false)); - - // Unsigned - assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) <= unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(true, true, false)); - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- OPERATOR> --------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // Signed - - // Same sign, negative - assert(morton_t(vector_t(-954, -455, -333)) > morton_t(vector_t(-433, -455, -433)) == bool_vector_t(false, false, true)); - // Same sign, positive - assert(morton_t(vector_t(954, 455, 333)) > morton_t(vector_t(433, 455, 433)) == bool_vector_t(true, false, false)); - // Differing signs - assert(morton_t(vector_t(954, -32, 0)) > morton_t(vector_t(-44, 0, -1)) == bool_vector_t(true, false, true)); - - // Unsigned - assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) > unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(false, false, true)); - - // ---------------------------------------------------------------------------------------------------- - // -------------------------------------- OPERATOR>= -------------------------------------------------- - // ---------------------------------------------------------------------------------------------------- - - // Signed - - // Same sign, negative - assert(morton_t(vector_t(-954, -455, -333)) >= morton_t(vector_t(-433, -455, -433)) == bool_vector_t(false, true, true)); - // Same sign, positive - assert(morton_t(vector_t(954, 455, 333)) >= morton_t(vector_t(433, 455, 433)) == bool_vector_t(true, true, false)); - // Differing signs - assert(morton_t(vector_t(954, -32, 0)) >= morton_t(vector_t(-44, 0, -1)) == bool_vector_t(true, false, true)); - - // Unsigned - assert(unsigned_morton_t(unsigned_vector_t(239, 435, 66)) >= unsigned_morton_t(unsigned_vector_t(240, 435, 50)) == bool_vector_t(false, true, true)); - - */ - - return true; - } - - // Platforms like WASM expect the main entry point to periodically return control, hence if you want a crossplatform app, you have to let the framework deal with your "game loop" - void workLoopBody() override {} - - // Whether to keep invoking the above. In this example because its headless GPU compute, we do all the work in the app initialization. - bool keepRunning() override {return false;} - - // Cleanup - bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } - - private: - smart_refctd_ptr m_pipeline; - - smart_refctd_ptr m_utils; - - StreamingTransientDataBufferMT<>* m_downStreamingBuffer; - smart_refctd_ptr m_deviceLocalBuffer; - - // These are Buffer Device Addresses - uint64_t m_downStreamingBufferAddress; - uint64_t m_deviceLocalBufferAddress; - - uint32_t m_alignment; - - smart_refctd_ptr m_timeline; - uint64_t semaphorValue = 0; + using device_base_t = MonoDeviceApplication; + using asset_base_t = MonoAssetManagerAndBuiltinResourceApplication; +public: + MortonTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + { + + } + + Tester::PipelineSetupData pplnSetupData; + pplnSetupData.device = m_device; + pplnSetupData.api = m_api; + pplnSetupData.assetMgr = m_assetMgr; + pplnSetupData.logger = m_logger; + pplnSetupData.physicalDevice = m_physicalDevice; + pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + { + Tester mortonTester; + pplnSetupData.testShaderPath = "app_resources/mortonTest.comp.hlsl"; + mortonTester.setupPipeline(pplnSetupData); + mortonTester.performTests(); + } + + + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override + { + m_keepRunning = false; + } + + bool keepRunning() override + { + return m_keepRunning; + } + + +private: + bool m_keepRunning = true; }; - -NBL_MAIN_FUNC(MortonTestApp) \ No newline at end of file +NBL_MAIN_FUNC(MortonTest) \ No newline at end of file diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h index a216fbf40..207cdee51 100644 --- a/22_CppCompat/ITester.h +++ b/22_CppCompat/ITester.h @@ -217,6 +217,7 @@ class ITester { case TestType::CPU: ss << "CPU TEST ERROR:\n"; + break; case TestType::GPU: ss << "GPU TEST ERROR:\n"; } From c68c336317024ae80fb017b1cb71e6b32a152224 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:16:34 -0300 Subject: [PATCH 08/57] Done with tests --- 12_Mortons/CTester.h | 401 ++++++++++++++++++ 12_Mortons/{Tester.h => ITester.h} | 133 +----- 12_Mortons/app_resources/common.hlsl | 299 ++----------- .../{mortonTest.comp.hlsl => test.comp.hlsl} | 5 +- 12_Mortons/app_resources/testCommon.hlsl | 242 +++++++++++ 12_Mortons/main.cpp | 13 +- 6 files changed, 691 insertions(+), 402 deletions(-) create mode 100644 12_Mortons/CTester.h rename 12_Mortons/{Tester.h => ITester.h} (66%) rename 12_Mortons/app_resources/{mortonTest.comp.hlsl => test.comp.hlsl} (79%) create mode 100644 12_Mortons/app_resources/testCommon.hlsl diff --git a/12_Mortons/CTester.h b/12_Mortons/CTester.h new file mode 100644 index 000000000..5a61be501 --- /dev/null +++ b/12_Mortons/CTester.h @@ -0,0 +1,401 @@ +#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ + +#include +#include "app_resources/testCommon.hlsl" +#include "nbl/application_templates/MonoDeviceApplication.hpp" +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "ITester.h" + +using namespace nbl; + +class CTester final : public ITester +{ +public: + void performTests() + { + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + uint64_t generatedA = longDistribution(mt); + uint64_t generatedB = longDistribution(mt); + + testInput.generatedA = generatedA; + testInput.generatedB = generatedB; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2 ); + uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2 ); + uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); + uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); + uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); + uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); + uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); + uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); + uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); + uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); + uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); + uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); + uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); + uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); + uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); + uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + + int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + + int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + + // Plus + expected.mortonPlus_small_2 = morton::code::create(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = morton::code::create(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = morton::code::create(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = morton::code::create(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = morton::code::create(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = morton::code::create(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = morton::code::create(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = morton::code::create(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = morton::code::create(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = morton::code::create(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = morton::code::create(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = morton::code::create(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + // Left-shift + expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + + expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + + expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2))& uint32_t(fullBitsMask_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3))& uint32_t(fullBitsMask_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + } + + performCpuTests(testInput, expected); + performGpuTests(testInput, expected); + } + m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +private: + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + + fillTestValues(commonTestInputValues, cpuTestValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); + + // Morton Plus + verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); + verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); + verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); + verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); + + verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); + + // Morton Minus + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); + + // Morton coordinate-wise equality + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); + + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); + + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); + + // Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); + + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); + + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); + + // Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); + + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); + + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); + + // Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); + + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); + + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); + + // Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); + + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); + + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); + + // Morton signed right-shift + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); + + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); + + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); + } +}; + +#endif \ No newline at end of file diff --git a/12_Mortons/Tester.h b/12_Mortons/ITester.h similarity index 66% rename from 12_Mortons/Tester.h rename to 12_Mortons/ITester.h index 480328d18..2510dd997 100644 --- a/12_Mortons/Tester.h +++ b/12_Mortons/ITester.h @@ -1,5 +1,5 @@ -#ifndef _NBL_EXAMPLES_TESTS_12_MORTONS_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_12_MORTONS_TESTER_INCLUDED_ +#ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ #include #include "app_resources/common.hlsl" @@ -8,10 +8,10 @@ using namespace nbl; -class Tester +class ITester { public: - virtual ~Tester() + virtual ~ITester() { m_outputBufferAllocation.memory->unmap(); }; @@ -128,7 +128,7 @@ class Tester if (!inputBuff) logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - inputBuff->setObjectDebugName("morton input buffer"); + inputBuff->setObjectDebugName("emulated_float64_t output buffer"); video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = inputBuff->getMemoryReqs(); reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); @@ -163,7 +163,7 @@ class Tester if (!outputBuff) logFail("Failed to create a GPU Buffer of size %d!\n", params.size); - outputBuff->setObjectDebugName("morton output buffer"); + outputBuff->setObjectDebugName("emulated_float64_t output buffer"); video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = outputBuff->getMemoryReqs(); reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); @@ -211,29 +211,6 @@ class Tester if (expectedVal == testVal) return; - std::stringstream ss; - switch (testType) - { - case TestType::CPU: - ss << "CPU TEST ERROR:\n"; - case TestType::GPU: - ss << "GPU TEST ERROR:\n"; - } - - ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; //test value: " << testVal << " expected value: " << expectedVal << '\n'; - - m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); - } - - template - void verifyTestVector3dValue(const std::string& memberName, const nbl::hlsl::vector& expectedVal, const nbl::hlsl::vector& testVal, const TestType testType) - { - static constexpr float MaxAllowedError = 0.1f; - if (std::abs(double(expectedVal.x) - double(testVal.x)) <= MaxAllowedError && - std::abs(double(expectedVal.y) - double(testVal.y)) <= MaxAllowedError && - std::abs(double(expectedVal.z) - double(testVal.z)) <= MaxAllowedError) - return; - std::stringstream ss; switch (testType) { @@ -244,69 +221,11 @@ class Tester ss << "GPU TEST ERROR:\n"; } - ss << "nbl::hlsl::" << memberName << " produced incorrect output! test value: " << - testVal.x << ' ' << testVal.y << ' ' << testVal.z << - " expected value: " << expectedVal.x << ' ' << expectedVal.y << ' ' << expectedVal.z << '\n'; + ss << "nbl::hlsl::" << memberName << " produced incorrect output!" << '\n'; m_logger->log(ss.str().c_str(), system::ILogger::ELL_ERROR); } - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); - - std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); - std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); - std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - uint64_t generatedA = longDistribution(mt); - uint64_t generatedB = longDistribution(mt); - - testInput.generatedA = generatedA; - testInput.generatedB = generatedB; - - expected.emulatedAnd = _static_cast(generatedA & generatedB); - expected.emulatedOr = _static_cast(generatedA | generatedB); - expected.emulatedXor = _static_cast(generatedA ^ generatedB); - expected.emulatedNot = _static_cast(~generatedA); - expected.emulatedPlus = _static_cast(generatedA + generatedB); - expected.emulatedMinus = _static_cast(generatedA - generatedB); - expected.emulatedLess = uint32_t(generatedA < generatedB); - expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); - expected.emulatedGreater = uint32_t(generatedA > generatedB); - expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); - - expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); - expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); - expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); - } - { - uint64_t coordX = longDistribution(mt); - uint64_t coordY = longDistribution(mt); - uint64_t coordZ = longDistribution(mt); - uint64_t coordW = longDistribution(mt); - - - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } - protected: uint32_t m_queueFamily; core::smart_refctd_ptr m_device; @@ -324,7 +243,7 @@ class Tester core::smart_refctd_ptr m_semaphore; video::IQueue* m_queue; uint64_t m_semaphoreCounter; - + template OutputStruct dispatch(const InputStruct& input) { @@ -375,42 +294,6 @@ class Tester m_logger->log(msg, system::ILogger::ELL_ERROR, std::forward(args)...); exit(-1); } - - inline static constexpr int Iterations = 100u; - - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues cpuTestValues; - cpuTestValues.fillTestValues(commonTestInputValues); - verifyTestValues(expectedTestValues, cpuTestValues, TestType::CPU); - - } - - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, TestType::GPU); - } - - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, TestType testType) - { - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); - - //verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); - } }; #endif \ No newline at end of file diff --git a/12_Mortons/app_resources/common.hlsl b/12_Mortons/app_resources/common.hlsl index be6a2f4a0..b058ad821 100644 --- a/12_Mortons/app_resources/common.hlsl +++ b/12_Mortons/app_resources/common.hlsl @@ -5,10 +5,6 @@ #ifndef _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ #define _NBL_EXAMPLES_TESTS_12_MORTON_COMMON_INCLUDED_ -// because DXC doesn't properly support `_Static_assert` -// TODO: add a message, and move to macros.h or cpp_compat -#define STATIC_ASSERT(...) { nbl::hlsl::conditional<__VA_ARGS__, int, void>::type a = 0; } - #include #include @@ -23,6 +19,22 @@ NBL_CONSTEXPR uint16_t smallBits_4 = 4; NBL_CONSTEXPR uint16_t mediumBits_4 = 8; NBL_CONSTEXPR uint16_t fullBits_4 = 16; +#ifndef __HLSL_VERSION + +constexpr uint64_t smallBitsMask_2 = (uint64_t(1) << smallBits_2) - 1; +constexpr uint64_t mediumBitsMask_2 = (uint64_t(1) << mediumBits_2) - 1; +constexpr uint64_t fullBitsMask_2 = (uint64_t(1) << fullBits_2) - 1; + +constexpr uint64_t smallBitsMask_3 = (uint64_t(1) << smallBits_3) - 1; +constexpr uint64_t mediumBitsMask_3 = (uint64_t(1) << mediumBits_3) - 1; +constexpr uint64_t fullBitsMask_3 = (uint64_t(1) << fullBits_3) - 1; + +constexpr uint64_t smallBitsMask_4 = (uint64_t(1) << smallBits_4) - 1; +constexpr uint64_t mediumBitsMask_4 = (uint64_t(1) << mediumBits_4) - 1; +constexpr uint64_t fullBitsMask_4 = (uint64_t(1) << fullBits_4) - 1; + +#endif + using namespace nbl::hlsl; struct InputTestValues { @@ -190,33 +202,9 @@ struct TestValues morton::code mortonSignedRightShift_full_4; morton::code mortonSignedRightShift_emulated_4; - void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input) + /* + void fillSecondTestValues(NBL_CONST_REF_ARG(InputTestValues) input) { - emulated_uint64_t emulatedA = _static_cast(input.generatedA); - emulated_uint64_t emulatedB = _static_cast(input.generatedB); - - // Emulated int tests - emulatedAnd = emulatedA & emulatedB; - emulatedOr = emulatedA | emulatedB; - emulatedXor = emulatedA ^ emulatedB; - emulatedNot = emulatedA.operator~(); - emulatedPlus = emulatedA + emulatedB; - emulatedMinus = emulatedA - emulatedB; - emulatedLess = uint32_t(emulatedA < emulatedB); - emulatedLessEqual = uint32_t(emulatedA <= emulatedB); - emulatedGreater = uint32_t(emulatedA > emulatedB); - emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); - - left_shift_operator leftShift; - emulatedLeftShifted = leftShift(emulatedA, input.shift); - - arithmetic_right_shift_operator unsignedRightShift; - emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); - - arithmetic_right_shift_operator signedRightShift; - emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); - - // Morton tests uint64_t2 Vec2A = { input.coordX, input.coordY }; uint64_t2 Vec2B = { input.coordZ, input.coordW }; @@ -235,250 +223,29 @@ struct TestValues int64_t4 Vec4ASigned = int64_t4(Vec4A); int64_t4 Vec4BSigned = int64_t4(Vec4B); - morton::code morton_small_2A = morton::code::create(Vec2A); - morton::code morton_medium_2A = morton::code::create(Vec2A); - morton::code morton_full_2A = morton::code::create(Vec2A); - morton::code morton_emulated_2A = morton::code::create(Vec2A); - morton::code morton_small_2B = morton::code::create(Vec2B); - morton::code morton_medium_2B = morton::code::create(Vec2B); - morton::code morton_full_2B = morton::code::create(Vec2B); - morton::code morton_emulated_2B = morton::code::create(Vec2B); - - morton::code morton_small_3A = morton::code::create(Vec3A); - morton::code morton_medium_3A = morton::code::create(Vec3A); - morton::code morton_full_3A = morton::code::create(Vec3A); - morton::code morton_emulated_3A = morton::code::create(Vec3A); - morton::code morton_small_3B = morton::code::create(Vec3B); - morton::code morton_medium_3B = morton::code::create(Vec3B); - morton::code morton_full_3B = morton::code::create(Vec3B); - morton::code morton_emulated_3B = morton::code::create(Vec3B); - - morton::code morton_small_4A = morton::code::create(Vec4A); - morton::code morton_medium_4A = morton::code::create(Vec4A); - morton::code morton_full_4A = morton::code::create(Vec4A); morton::code morton_emulated_4A = morton::code::create(Vec4A); - morton::code morton_small_4B = morton::code::create(Vec4B); - morton::code morton_medium_4B = morton::code::create(Vec4B); - morton::code morton_full_4B = morton::code::create(Vec4B); - morton::code morton_emulated_4B = morton::code::create(Vec4B); - - morton::code morton_small_2ASigned = morton::code::create(Vec2ASigned); - morton::code morton_medium_2ASigned = morton::code::create(Vec2ASigned); - morton::code morton_full_2ASigned = morton::code::create(Vec2ASigned); - morton::code morton_emulated_2ASigned = morton::code::create(Vec2ASigned); - morton::code morton_small_2BSigned = morton::code::create(Vec2BSigned); - morton::code morton_medium_2BSigned = morton::code::create(Vec2BSigned); - morton::code morton_full_2BSigned = morton::code::create(Vec2BSigned); - morton::code morton_emulated_2BSigned = morton::code::create(Vec2BSigned); - - morton::code morton_small_3ASigned = morton::code::create(Vec3ASigned); - morton::code morton_medium_3ASigned = morton::code::create(Vec3ASigned); - morton::code morton_full_3ASigned = morton::code::create(Vec3ASigned); - morton::code morton_emulated_3ASigned = morton::code::create(Vec3ASigned); - morton::code morton_small_3BSigned = morton::code::create(Vec3BSigned); - morton::code morton_medium_3BSigned = morton::code::create(Vec3BSigned); - morton::code morton_full_3BSigned = morton::code::create(Vec3BSigned); - morton::code morton_emulated_3BSigned = morton::code::create(Vec3BSigned); - - morton::code morton_small_4ASigned = morton::code::create(Vec4ASigned); - morton::code morton_medium_4ASigned = morton::code::create(Vec4ASigned); - morton::code morton_full_4ASigned = morton::code::create(Vec4ASigned); - morton::code morton_emulated_4ASigned = morton::code::create(Vec4ASigned); - morton::code morton_small_4BSigned = morton::code::create(Vec4BSigned); - morton::code morton_medium_4BSigned = morton::code::create(Vec4BSigned); - morton::code morton_full_4BSigned = morton::code::create(Vec4BSigned); - morton::code morton_emulated_4BSigned = morton::code::create(Vec4BSigned); - - /* - left_shift_operator > leftShiftTemp; - portable_vector_t interleaved = _static_cast >(uint16_t4(Vec4B)) & morton::impl::coding_mask_v<4, fullBits_4, morton::impl::CodingStages, emulated_uint64_t>; - - #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(fullBits_4 > (uint16_t(1) << I))\ - {\ - interleaved = interleaved | leftShiftTemp(interleaved, (uint16_t(1) << I) * (4 - 1));\ - interleaved = interleaved & _static_cast(morton::impl::coding_mask<4, fullBits_4, I>::value);\ - } - - ENCODE_LOOP_ITERATION(4) - ENCODE_LOOP_ITERATION(3) - ENCODE_LOOP_ITERATION(2) - ENCODE_LOOP_ITERATION(1) - ENCODE_LOOP_ITERATION(0) - - #undef ENCODE_LOOP_ITERATION - // After interleaving, shift each coordinate left by their index - return leftShiftTemp(interleaved, truncate >(vector(0, 1, 2, 3))); - - - array_get, emulated_uint64_t> getter; - emulatedAnd = getter(interleaved, 0); - */ - - // Plus - mortonPlus_small_2 = morton_small_2A + morton_small_2B; - mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; - mortonPlus_full_2 = morton_full_2A + morton_full_2B; - mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; - - mortonPlus_small_3 = morton_small_3A + morton_small_3B; - mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; - mortonPlus_full_3 = morton_full_3A + morton_full_3B; - mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; - - mortonPlus_small_4 = morton_small_4A + morton_small_4B; - mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; - mortonPlus_full_4 = morton_full_4A + morton_full_4B; - mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - - // Minus - mortonMinus_small_2 = morton_small_2A - morton_small_2B; - mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; - mortonMinus_full_2 = morton_full_2A - morton_full_2B; - mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; - - mortonMinus_small_3 = morton_small_3A - morton_small_3B; - mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; - mortonMinus_full_3 = morton_full_3A - morton_full_3B; - mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; - - mortonMinus_small_4 = morton_small_4A - morton_small_4B; - mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; - mortonMinus_full_4 = morton_full_4A - morton_full_4B; - mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - - // Coordinate-wise equality - mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); - mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); - mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); - mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); - - mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); - mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); - mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); - mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); - - mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); - mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); - mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); - mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); - - // Coordinate-wise unsigned inequality (just testing with less) - mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); - mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); - mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); - mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); - - mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); - mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); - mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); - mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); - - mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); - mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); - mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); - mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); + morton::code morton_emulated_2_signed = morton::code::create(Vec2ASigned); + morton::code morton_emulated_3_signed = morton::code::create(Vec3ASigned); + morton::code morton_emulated_4_signed = morton::code::create(Vec4ASigned); + + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); - // Coordinate-wise signed inequality - mortonSignedLess_small_2 = uint32_t2(morton_small_2ASigned.lessThan(int16_t2(Vec2BSigned))); - mortonSignedLess_medium_2 = uint32_t2(morton_medium_2ASigned.lessThan(int16_t2(Vec2BSigned))); - mortonSignedLess_full_2 = uint32_t2(morton_full_2ASigned.lessThan(int32_t2(Vec2BSigned))); - //mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2ASigned.lessThan(int32_t2(Vec2BSigned))); - - mortonSignedLess_small_3 = uint32_t3(morton_small_3ASigned.lessThan(int16_t3(Vec3BSigned))); - mortonSignedLess_medium_3 = uint32_t3(morton_medium_3ASigned.lessThan(int16_t3(Vec3BSigned))); - mortonSignedLess_full_3 = uint32_t3(morton_full_3ASigned.lessThan(int32_t3(Vec3BSigned))); - //mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3ASigned.lessThan(int32_t3(Vec3BSigned))); - - mortonSignedLess_small_4 = uint32_t4(morton_small_4ASigned.lessThan(int16_t4(Vec4BSigned))); - mortonSignedLess_medium_4 = uint32_t4(morton_medium_4ASigned.lessThan(int16_t4(Vec4BSigned))); - mortonSignedLess_full_4 = uint32_t4(morton_full_4ASigned.lessThan(int16_t4(Vec4BSigned))); - //mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4ASigned.lessThan(int16_t4(Vec4BSigned))); + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); - // Left-shift + mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(int32_t2(Vec2BSigned))); + mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(int32_t3(Vec3BSigned))); + mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(int16_t4(Vec4BSigned))); + uint16_t castedShift = uint16_t(input.shift); - left_shift_operator > leftShiftSmall2; - mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift); - left_shift_operator > leftShiftMedium2; - mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift); - left_shift_operator > leftShiftFull2; - mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift); - left_shift_operator > leftShiftEmulated2; - mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift); - - left_shift_operator > leftShiftSmall3; - mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift); - left_shift_operator > leftShiftMedium3; - mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift); - left_shift_operator > leftShiftFull3; - mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift); - left_shift_operator > leftShiftEmulated3; - mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift); - - left_shift_operator > leftShiftSmall4; - mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift); - left_shift_operator > leftShiftMedium4; - mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift); - left_shift_operator > leftShiftFull4; - mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift); - left_shift_operator > leftShiftEmulated4; - mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift); - - // Unsigned right-shift - arithmetic_right_shift_operator > rightShiftSmall2; - mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift); - arithmetic_right_shift_operator > rightShiftMedium2; - mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift); - arithmetic_right_shift_operator > rightShiftFull2; - mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift); - arithmetic_right_shift_operator > rightShiftEmulated2; - mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift); - - arithmetic_right_shift_operator > rightShiftSmall3; - mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift); - arithmetic_right_shift_operator > rightShiftMedium3; - mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift); - arithmetic_right_shift_operator > rightShiftFull3; - mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift); - arithmetic_right_shift_operator > rightShiftEmulated3; - mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift); - - arithmetic_right_shift_operator > rightShiftSmall4; - mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift); - arithmetic_right_shift_operator > rightShiftMedium4; - mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift); - arithmetic_right_shift_operator > rightShiftFull4; - mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift); - arithmetic_right_shift_operator > rightShiftEmulated4; - mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift); - - // Signed right-shift - arithmetic_right_shift_operator > rightShiftSignedSmall2; - mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedMedium2; - mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedFull2; - mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2ASigned, castedShift); + arithmetic_right_shift_operator > rightShiftSignedEmulated2; - //mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2ASigned, castedShift); - - arithmetic_right_shift_operator > rightShiftSignedSmall3; - mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedMedium3; - mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedFull3; - mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3ASigned, castedShift); + mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift); arithmetic_right_shift_operator > rightShiftSignedEmulated3; - //mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3ASigned, castedShift); - - arithmetic_right_shift_operator > rightShiftSignedSmall4; - mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedMedium4; - mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4ASigned, castedShift); - arithmetic_right_shift_operator > rightShiftSignedFull4; - mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4ASigned, castedShift); + mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift); arithmetic_right_shift_operator > rightShiftSignedEmulated4; - //mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4ASigned, castedShift); + mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift); } + */ }; #endif diff --git a/12_Mortons/app_resources/mortonTest.comp.hlsl b/12_Mortons/app_resources/test.comp.hlsl similarity index 79% rename from 12_Mortons/app_resources/mortonTest.comp.hlsl rename to 12_Mortons/app_resources/test.comp.hlsl index 7041568b8..243983d5a 100644 --- a/12_Mortons/app_resources/mortonTest.comp.hlsl +++ b/12_Mortons/app_resources/test.comp.hlsl @@ -1,9 +1,8 @@ //// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. //// This file is part of the "Nabla Engine". //// For conditions of distribution and use, see copyright notice in nabla.h -#pragma shader_stage(compute) -#include "common.hlsl" +#include "testCommon.hlsl" [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; @@ -12,5 +11,5 @@ void main(uint3 invocationID : SV_DispatchThreadID) { if (invocationID.x == 0) - outputTestValues[0].fillTestValues(inputTestValues[0]); + fillTestValues(inputTestValues[0], outputTestValues[0]); } diff --git a/12_Mortons/app_resources/testCommon.hlsl b/12_Mortons/app_resources/testCommon.hlsl new file mode 100644 index 000000000..9ff9a4fa8 --- /dev/null +++ b/12_Mortons/app_resources/testCommon.hlsl @@ -0,0 +1,242 @@ +#include "common.hlsl" + +void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +{ + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + int64_t2 Vec2ASigned = int64_t2(Vec2A); + int64_t2 Vec2BSigned = int64_t2(Vec2B); + + int64_t3 Vec3ASigned = int64_t3(Vec3A); + int64_t3 Vec3BSigned = int64_t3(Vec3B); + + int64_t4 Vec4ASigned = int64_t4(Vec4A); + int64_t4 Vec4BSigned = int64_t4(Vec4B); + + morton::code morton_small_2A = morton::code::create(Vec2A); + morton::code morton_medium_2A = morton::code::create(Vec2A); + morton::code morton_full_2A = morton::code::create(Vec2A); + morton::code morton_emulated_2A = morton::code::create(Vec2A); + morton::code morton_small_2B = morton::code::create(Vec2B); + morton::code morton_medium_2B = morton::code::create(Vec2B); + morton::code morton_full_2B = morton::code::create(Vec2B); + morton::code morton_emulated_2B = morton::code::create(Vec2B); + + morton::code morton_small_3A = morton::code::create(Vec3A); + morton::code morton_medium_3A = morton::code::create(Vec3A); + morton::code morton_full_3A = morton::code::create(Vec3A); + morton::code morton_emulated_3A = morton::code::create(Vec3A); + morton::code morton_small_3B = morton::code::create(Vec3B); + morton::code morton_medium_3B = morton::code::create(Vec3B); + morton::code morton_full_3B = morton::code::create(Vec3B); + morton::code morton_emulated_3B = morton::code::create(Vec3B); + + morton::code morton_small_4A = morton::code::create(Vec4A); + morton::code morton_medium_4A = morton::code::create(Vec4A); + morton::code morton_full_4A = morton::code::create(Vec4A); + morton::code morton_emulated_4A = morton::code::create(Vec4A); + morton::code morton_small_4B = morton::code::create(Vec4B); + morton::code morton_medium_4B = morton::code::create(Vec4B); + morton::code morton_full_4B = morton::code::create(Vec4B); + morton::code morton_emulated_4B = morton::code::create(Vec4B); + + morton::code morton_small_2_signed = morton::code::create(Vec2ASigned); + morton::code morton_medium_2_signed = morton::code::create(Vec2ASigned); + morton::code morton_full_2_signed = morton::code::create(Vec2ASigned); + + morton::code morton_small_3_signed = morton::code::create(Vec3ASigned); + morton::code morton_medium_3_signed = morton::code::create(Vec3ASigned); + morton::code morton_full_3_signed = morton::code::create(Vec3ASigned); + + morton::code morton_small_4_signed = morton::code::create(Vec4ASigned); + morton::code morton_medium_4_signed = morton::code::create(Vec4ASigned); + morton::code morton_full_4_signed = morton::code::create(Vec4ASigned); + + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2BSigned))); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2BSigned))); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2BSigned))); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3BSigned))); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3BSigned))); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3BSigned))); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4BSigned))); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4BSigned))); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4BSigned))); + + // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); +} \ No newline at end of file diff --git a/12_Mortons/main.cpp b/12_Mortons/main.cpp index f83c49b9e..18fd067ec 100644 --- a/12_Mortons/main.cpp +++ b/12_Mortons/main.cpp @@ -10,7 +10,7 @@ #include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "app_resources/common.hlsl" -#include "Tester.h" +#include "CTester.h" using namespace nbl::core; using namespace nbl::hlsl; @@ -35,24 +35,21 @@ class MortonTest final : public MonoDeviceApplication, public MonoAssetManagerAn return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - { - - } - Tester::PipelineSetupData pplnSetupData; + CTester::PipelineSetupData pplnSetupData; pplnSetupData.device = m_device; pplnSetupData.api = m_api; pplnSetupData.assetMgr = m_assetMgr; pplnSetupData.logger = m_logger; pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); + // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator { - Tester mortonTester; - pplnSetupData.testShaderPath = "app_resources/mortonTest.comp.hlsl"; + CTester mortonTester; + pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; mortonTester.setupPipeline(pplnSetupData); mortonTester.performTests(); } - return true; } From f05dec4652d1af3fa1a4664760efb1f3e934134a Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 28 Apr 2025 15:29:40 -0300 Subject: [PATCH 09/57] Clarifying comment for blocker issue --- 12_Mortons/main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/12_Mortons/main.cpp b/12_Mortons/main.cpp index 18fd067ec..a05e61842 100644 --- a/12_Mortons/main.cpp +++ b/12_Mortons/main.cpp @@ -44,6 +44,7 @@ class MortonTest final : public MonoDeviceApplication, public MonoAssetManagerAn pplnSetupData.physicalDevice = m_physicalDevice; pplnSetupData.computeFamilyIndex = getComputeQueue()->getFamilyIndex(); // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator + // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { CTester mortonTester; pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; From 8a8f958d179cc32afa227c30f60f4ada0d4369b8 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 22 Oct 2025 16:15:50 +0200 Subject: [PATCH 10/57] Enabled build time shader compilation in example 05 --- .../CMakeLists.txt | 46 ++++++++++++++++++- .../main.cpp | 5 +- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index a434ff32a..3c6054992 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -21,4 +21,48 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + COMMON_OPTIONS ${COMPILE_OPTIONS} +) \ No newline at end of file diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index b82dc18ca..131c7506a 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -6,6 +6,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoDeviceApplication.hpp" #include "nbl/examples/common/BuiltinResourcesApplication.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; @@ -96,7 +97,9 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl",lp); + + auto key = "app_resources/" + nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); From f1a3ee5921b5fea3f275b67344e722066a901da8 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 22 Oct 2025 17:38:08 +0200 Subject: [PATCH 11/57] Fix --- .../app_resources/shader.comp.hlsl | 5 +---- 05_StreamingAndBufferDeviceAddressApp/main.cpp | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl index af38ffada..31c60aefd 100644 --- a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl +++ b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl @@ -1,12 +1,9 @@ #include "common.hlsl" -// just a small test -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - [[vk::push_constant]] PushConstantData pushConstants; // does absolutely nothing, a later example will show how it gets used -template +template void dummyTraitTest() {} [numthreads(WorkgroupSize,1,1)] diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index 131c7506a..495890c6d 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -96,9 +96,9 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root + lp.workingDirectory = "app_resources"; // virtual root - auto key = "app_resources/" + nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) From e301db5db00ec1a77d4e231037bd05d7f23adbc7 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 22 Oct 2025 18:02:33 +0200 Subject: [PATCH 12/57] Updated source file generation of the 05_streamingandbufferdeviceaddressappSPIRV project --- 05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index 3c6054992..1dcceed5d 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -28,6 +28,8 @@ set(DEPENDS app_resources/common.hlsl app_resources/shader.comp.hlsl ) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(JSON [=[ [ From f85ae8045c13380ace4c124d8a07349b4fd5fb62 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 23 Oct 2025 23:16:25 +0200 Subject: [PATCH 13/57] Enabled build time shader compilation in multiple examples --- 03_DeviceSelectionAndSharedSources/main.cpp | 2 +- .../main.cpp | 4 +- 07_StagingAndMultipleQueues/CMakeLists.txt | 48 ++++++++++++- 07_StagingAndMultipleQueues/main.cpp | 44 +++++++----- 11_FFT/main.cpp | 1 - 24_ColorSpaceTest/CMakeLists.txt | 46 +++++++++++++ 24_ColorSpaceTest/main.cpp | 21 +++--- 62_CAD/CMakeLists.txt | 68 ++++++++++++++++++- 62_CAD/main.cpp | 14 ++-- 62_CAD/shaders/globals.hlsl | 6 -- .../shaders/main_pipeline/vertex_shader.hlsl | 10 +-- 64_EmulatedFloatTest/CMakeLists.txt | 54 ++++++++++++++- .../benchmark/benchmark.comp.hlsl | 1 + .../app_resources/test.comp.hlsl | 1 + 64_EmulatedFloatTest/main.cpp | 52 ++++---------- 67_RayQueryGeometry/CMakeLists.txt | 48 ++++++++++++- .../app_resources/render.comp.hlsl | 2 - 67_RayQueryGeometry/main.cpp | 12 ++-- 18 files changed, 334 insertions(+), 100 deletions(-) diff --git a/03_DeviceSelectionAndSharedSources/main.cpp b/03_DeviceSelectionAndSharedSources/main.cpp index b8fd3d18b..bcc849a4d 100644 --- a/03_DeviceSelectionAndSharedSources/main.cpp +++ b/03_DeviceSelectionAndSharedSources/main.cpp @@ -257,7 +257,7 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M } const auto* metadata = assetBundle.getMetadata(); - const auto hlslMetadata = static_cast(metadata); + const auto hlslMetadata = static_cast(metadata); const auto shaderStage = hlslMetadata->shaderStages->front(); // It would be super weird if loading a shader from a file produced more than 1 asset diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index 495890c6d..ab0984a07 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -104,9 +104,7 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M if (assets.empty()) return logFail("Could not load shader!"); - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - const auto shaderSource = IAsset::castDown(assets[0]); - shader = m_device->compileShader({shaderSource.get()}); + shader = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(shader); } diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index a434ff32a..cc4ecd465 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -21,4 +21,50 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/comp_shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/comp_shader.hlsl", + "KEY": "comp_shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + COMMON_OPTIONS ${COMPILE_OPTIONS} +) \ No newline at end of file diff --git a/07_StagingAndMultipleQueues/main.cpp b/07_StagingAndMultipleQueues/main.cpp index fc6bf4551..70455eb96 100644 --- a/07_StagingAndMultipleQueues/main.cpp +++ b/07_StagingAndMultipleQueues/main.cpp @@ -4,6 +4,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; using namespace nbl::core; @@ -189,7 +190,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul for (uint32_t imageIdx = 0; imageIdx < IMAGE_CNT; ++imageIdx) { const auto imagePathToLoad = imagesToLoad[imageIdx]; - auto cpuImage = loadFistAssetInBundle(imagePathToLoad); + auto cpuImage = loadImageAsset(imagePathToLoad); if (!cpuImage) logFailAndTerminate("Failed to load image from path %s",ILogger::ELL_ERROR,imagePathToLoad); @@ -279,17 +280,10 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul } // LOAD SHADER FROM FILE - smart_refctd_ptr source; - { - source = loadFistAssetInBundle("../app_resources/comp_shader.hlsl"); - } + smart_refctd_ptr shader = loadPreCompiledShader<"comp_shader">("../app_resources/comp_shader.hlsl"); - if (!source) - logFailAndTerminate("Could not create a CPU shader!"); - - core::smart_refctd_ptr shader = m_device->compileShader({ source.get() }); - if(!shader) - logFailAndTerminate("Could not compile shader to spirv!"); + if (!shader) + logFailAndTerminate("Could not load the precompiled shader!"); // CREATE COMPUTE PIPELINE SPushConstantRange pc[1]; @@ -534,21 +528,39 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul return false; } - - template - core::smart_refctd_ptr loadFistAssetInBundle(const std::string& path) + + core::smart_refctd_ptr loadImageAsset(const std::string& path) { IAssetLoader::SAssetLoadParams lp; SAssetBundle bundle = m_assetMgr->getAsset(path, lp); if (bundle.getContents().empty()) - logFailAndTerminate("Couldn't load an asset.",ILogger::ELL_ERROR); + logFailAndTerminate("Couldn't load an image.",ILogger::ELL_ERROR); - auto asset = IAsset::castDown(bundle.getContents()[0]); + auto asset = IAsset::castDown(bundle.getContents()[0]); if (!asset) logFailAndTerminate("Incorrect asset loaded.",ILogger::ELL_ERROR); return asset; } + + template + core::smart_refctd_ptr loadPreCompiledShader(const std::string& path) + { + IAssetLoader::SAssetLoadParams lp; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + SAssetBundle bundle = m_assetMgr->getAsset(key.data(), lp); + if (bundle.getContents().empty()) + logFailAndTerminate("Couldn't load a shader.", ILogger::ELL_ERROR); + + auto asset = IAsset::castDown(bundle.getContents()[0]); + if (!asset) + logFailAndTerminate("Incorrect asset loaded.", ILogger::ELL_ERROR); + + return asset; + } }; NBL_MAIN_FUNC(StagingAndMultipleQueuesApp) diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 1886da72a..2be25d92b 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -2,7 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "nbl/examples/examples.hpp" using namespace nbl; diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index 026add505..fcf8faa36 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -32,4 +32,50 @@ add_test(NAME NBL_IMAGE_HASH_RUN_TESTS COMMAND "$" --test hash WORKING_DIRECTORY "$" COMMAND_EXPAND_LISTS +) + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/present.frag.hlsl + app_resources/push_constants.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/present.frag.hlsl", + "KEY": "present", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + COMMON_OPTIONS ${COMPILE_OPTIONS} ) \ No newline at end of file diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 84c55ef3a..e8858f5a6 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -160,26 +161,24 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + auto loadPrecompiledShader = [&](const std::string& relPath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return nullptr; - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; - - return m_device->compileShader({ source.get() }); + auto shader = IAsset::castDown(assets[0]); + return shader; }; - auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); + auto fragmentShader = loadPrecompiledShader.operator()<"present">("app_resources/present.frag.hlsl"); if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader!"); + return logFail("Failed to load precompiled fragment shader!"); // Now surface indep resources m_semaphore = m_device->createSemaphore(m_submitIx); diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index c3a0fa47e..c193dc63c 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -61,4 +61,70 @@ else() foreach(NBL_TARGET IN LISTS NBL_MSDFGEN_TARGETS) target_include_directories(${EXECUTABLE_NAME} PUBLIC $) endforeach() -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + shaders/globals.hlsl + shaders/runtimeDeviceConfigCaps.hlsl + shaders/main_pipeline/common.hlsl + shaders/main_pipeline/dtm.hlsl + shaders/main_pipeline/fragment.hlsl + shaders/main_pipeline/fragment_shader.hlsl + shaders/main_pipeline/fragment_shader_debug.hlsl + shaders/main_pipeline/line_style.hlsl + shaders/main_pipeline/resolve_alphas.hlsl + shaders/main_pipeline/vertex_shader.hlsl +) + +set(SM 6_8) + +set(REQUIRED_CAPS [=[ + { + "kind": "features", + "name": "fragmentShaderPixelInterlock", + "type": "bool", + "values": [1] + } +]=]) + +set(JSON [=[ +[ + { + "INPUT": "shaders/main_pipeline/vertex_shader.hlsl", + "KEY": "main_pipeline_vertex_shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [${REQUIRED_CAPS}] + }, + { + "INPUT": "shaders/main_pipeline/fragment.hlsl", + "KEY": "main_pipeline_fragment_shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [${REQUIRED_CAPS}] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index f4a886791..ec7b177eb 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1,5 +1,5 @@ // TODO: Copyright notice - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" @@ -961,12 +961,14 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } // Load Custom Shader - auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr + auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); + lp.workingDirectory = "shaders"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return nullptr; @@ -979,8 +981,8 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio return m_device->compileShader( ILogicalDevice::SShaderCreationParameters { .source = source.get(), .readCache = shaderReadCache.get(), .writeCache = shaderWriteCache.get(), .stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY }); }; - mainPipelineFragmentShaders = loadCompileShader("../shaders/main_pipeline/fragment.hlsl"); - mainPipelineVertexShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl"); + mainPipelineFragmentShaders = loadCompileShader.operator()<"main_pipeline_fragment_shader">("../shaders/main_pipeline/fragment.hlsl"); + mainPipelineVertexShader = loadCompileShader.operator() <"main_pipeline_vertex_shader"> ("../shaders/main_pipeline/vertex_shader.hlsl"); core::smart_refctd_ptr shaderWriteCacheFile; { diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 5c3681910..bad6e6132 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,12 +1,6 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#ifdef __HLSL_VERSION -#ifndef NBL_USE_SPIRV_BUILTINS -#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps -#endif -#endif - // TODO[Erfan]: Turn off in the future, but keep enabled to test // #define NBL_FORCE_EMULATED_FLOAT_64 diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index 90394e935..df566f002 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -706,19 +706,19 @@ PSInput vtxMain(uint vertexID : SV_VertexID) if (corner.x == 0.0f && corner.y == 0.0f) { - dilationVector.x = ieee754::flipSign(dilationVector.x); + dilationVector.x = ieee754::flipSign(dilationVector.x, true); uvOffset.x = -uvOffset.x; uvOffset.y = -uvOffset.y; } else if (corner.x == 0.0f && corner.y == 1.0f) { - dilationVector.x = ieee754::flipSign(dilationVector.x); - dilationVector.y = ieee754::flipSign(dilationVector.y); + dilationVector.x = ieee754::flipSign(dilationVector.x, true); + dilationVector.y = ieee754::flipSign(dilationVector.y, true); uvOffset.x = -uvOffset.x; } else if (corner.x == 1.0f && corner.y == 1.0f) { - dilationVector.y = ieee754::flipSign(dilationVector.y); + dilationVector.y = ieee754::flipSign(dilationVector.y, true); } else if (corner.x == 1.0f && corner.y == 0.0f) { @@ -730,7 +730,7 @@ PSInput vtxMain(uint vertexID : SV_VertexID) pfloat64_t2 worldSpaceExtentsYAxisFlipped; worldSpaceExtentsYAxisFlipped.x = worldSpaceExtents.x; - worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y); + worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y, true); const pfloat64_t2 vtxPos = topLeft + worldSpaceExtentsYAxisFlipped * _static_cast(corner); const pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index aae93590d..1b272bf2e 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -27,4 +27,56 @@ if(MSVC) target_compile_options("${EXECUTABLE_NAME}" PUBLIC "/fp:strict") else() target_compile_options("${EXECUTABLE_NAME}" PUBLIC -ffloat-store -frounding-math -fsignaling-nans -ftrapping-math) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/test.comp.hlsl + app_resources/benchmark/benchmark.comp.hlsl + app_resources/benchmark/common.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }, + { + "INPUT": "app_resources/benchmark/benchmark.comp.hlsl", + "KEY": "benchmark", + }, +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + COMMON_OPTIONS ${COMPILE_OPTIONS} +) \ No newline at end of file diff --git a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl index b31da3737..a515f6bcb 100644 --- a/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl +++ b/64_EmulatedFloatTest/app_resources/benchmark/benchmark.comp.hlsl @@ -66,6 +66,7 @@ uint64_t calcIntegral() } [numthreads(BENCHMARK_WORKGROUP_DIMENSION_SIZE_X, 1, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { static const uint32_t NativeToEmulatedRatio = 6; diff --git a/64_EmulatedFloatTest/app_resources/test.comp.hlsl b/64_EmulatedFloatTest/app_resources/test.comp.hlsl index 7681e80a5..e95eadd49 100644 --- a/64_EmulatedFloatTest/app_resources/test.comp.hlsl +++ b/64_EmulatedFloatTest/app_resources/test.comp.hlsl @@ -12,6 +12,7 @@ PushConstants pc; [numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { const nbl::hlsl::emulated_float64_t a = nbl::hlsl::bit_cast >(pc.a); diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index 3fc635e87..a4f177f16 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" @@ -262,9 +262,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); - lp.workingDirectory = ""; // virtual root - // this time we load a shader directly from a file - auto assetBundle = base.m_assetMgr->getAsset("app_resources/test.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + + auto key = nbl::this_example::builtin::build::get_spirv_key<"test">(base.m_device.get()); + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -274,26 +275,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); - - auto* compilerSet = base.m_assetMgr->getCompilerSet(); - - nbl::asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = base.m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - auto spirv = compilerSet->compileToSPIRV(source.get(), options); - - shader = base.m_device->compileShader({spirv.get()}); + shader = IAsset::castDown(assets[0]); } if (!shader) - base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + base.logFail("Failed to load precompiled \"test\" shader!\n"); nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { { @@ -928,9 +914,10 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); - lp.workingDirectory = ""; // virtual root + lp.workingDirectory = "app_resources"; // virtual root // this time we load a shader directly from a file - auto assetBundle = base.m_assetMgr->getAsset("app_resources/benchmark/benchmark.comp.hlsl", lp); + auto key = nbl::this_example::builtin::build::get_spirv_key<"benchmark">(m_device.get()); + auto assetBundle = base.m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -940,26 +927,11 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); - - auto* compilerSet = base.m_assetMgr->getCompilerSet(); - - IShaderCompiler::SCompilerOptions options = {}; - options.stage = ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = base.m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - auto spirv = compilerSet->compileToSPIRV(source.get(), options); - - shader = base.m_device->compileShader({spirv.get()}); + shader = IAsset::castDown(assets[0]); } if (!shader) - base.logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); + base.logFail("Failed to load precompiled \"benchmark\" shader!\n"); nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { { diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index d26a90205..40f32624a 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -25,4 +25,50 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/render.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/render.comp.hlsl", + "KEY": "render", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + COMMON_OPTIONS ${COMPILE_OPTIONS} +) \ No newline at end of file diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl index 954598c9a..889e1f38b 100644 --- a/67_RayQueryGeometry/app_resources/render.comp.hlsl +++ b/67_RayQueryGeometry/app_resources/render.comp.hlsl @@ -1,7 +1,5 @@ #include "common.hlsl" -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index 2783385f2..b35000485 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { @@ -150,8 +151,10 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const std::string shaderPath = "app_resources/render.comp.hlsl"; IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(shaderPath, lparams); + lparams.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key<"render">(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, shaderPath); @@ -160,10 +163,9 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - auto shader = m_device->compileShader({shaderSrc.get()}); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); if (!shader) - return logFail("Failed to create shader!"); + return logFail("Failed to load precompiled shader!"); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0u, .size = sizeof(SPushConstants)}; auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr(renderDs->getLayout()), nullptr, nullptr, nullptr); From 22f2a17401e8e70dddff477e11db12ebd1dea2bd Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 27 Oct 2025 15:51:37 +0100 Subject: [PATCH 14/57] Fixed project creation of multiple examples --- .../CMakeLists.txt | 32 ++-- 07_StagingAndMultipleQueues/CMakeLists.txt | 4 +- 07_StagingAndMultipleQueues/main.cpp | 4 +- 11_FFT/CMakeLists.txt | 48 +++++- 11_FFT/main.cpp | 32 ++-- 24_ColorSpaceTest/CMakeLists.txt | 4 +- 24_ColorSpaceTest/main.cpp | 30 ++-- 62_CAD/CMakeLists.txt | 27 ++-- 62_CAD/main.cpp | 42 ++--- 64_EmulatedFloatTest/CMakeLists.txt | 4 +- 67_RayQueryGeometry/CMakeLists.txt | 4 +- 70_FLIPFluids/CMakeLists.txt | 99 +++++++++++- .../app_resources/compute/diffusion.comp.hlsl | 3 + .../compute/pressureSolver.comp.hlsl | 2 + .../compute/updateFluidCells.comp.hlsl | 1 + 70_FLIPFluids/main.cpp | 148 +++++++----------- 16 files changed, 298 insertions(+), 186 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index 1dcceed5d..a342ac3d5 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -31,6 +31,7 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +set(SM 6_8) set(JSON [=[ [ { @@ -48,23 +49,22 @@ set(COMPILE_OPTIONS ) NBL_CREATE_NSC_COMPILE_RULES( - TARGET ${EXECUTABLE_NAME}SPIRV - LINK_TO ${EXECUTABLE_NAME} - DEPENDS ${DEPENDS} - BINARY_DIR ${OUTPUT_DIRECTORY} - MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VAR KEYS - INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp - NAMESPACE nbl::this_example::builtin::build - INPUTS ${JSON} + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} ) NBL_CREATE_RESOURCE_ARCHIVE( - NAMESPACE nbl::this_example::builtin::build - TARGET ${EXECUTABLE_NAME}_builtinsBuild - LINK_TO ${EXECUTABLE_NAME} - BIND ${OUTPUT_DIRECTORY} - BUILTINS ${KEYS} - COMMON_OPTIONS ${COMPILE_OPTIONS} + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} ) \ No newline at end of file diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index cc4ecd465..19515454d 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -31,6 +31,7 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +set(SM 6_8) set(JSON [=[ [ { @@ -53,7 +54,7 @@ NBL_CREATE_NSC_COMPILE_RULES( DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build @@ -66,5 +67,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} - COMMON_OPTIONS ${COMPILE_OPTIONS} ) \ No newline at end of file diff --git a/07_StagingAndMultipleQueues/main.cpp b/07_StagingAndMultipleQueues/main.cpp index 70455eb96..a850c1c47 100644 --- a/07_StagingAndMultipleQueues/main.cpp +++ b/07_StagingAndMultipleQueues/main.cpp @@ -280,7 +280,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul } // LOAD SHADER FROM FILE - smart_refctd_ptr shader = loadPreCompiledShader<"comp_shader">("../app_resources/comp_shader.hlsl"); + smart_refctd_ptr shader = loadPreCompiledShader<"comp_shader">(); // "../app_resources/comp_shader.hlsl" if (!shader) logFailAndTerminate("Could not load the precompiled shader!"); @@ -544,7 +544,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul } template - core::smart_refctd_ptr loadPreCompiledShader(const std::string& path) + core::smart_refctd_ptr loadPreCompiledShader() { IAssetLoader::SAssetLoadParams lp; lp.logger = m_logger.get(); diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt index a434ff32a..9a2ee5a21 100644 --- a/11_FFT/CMakeLists.txt +++ b/11_FFT/CMakeLists.txt @@ -21,4 +21,50 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 2be25d92b..49d157a38 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -2,6 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + #include "nbl/examples/examples.hpp" using namespace nbl; @@ -44,15 +46,6 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ smart_refctd_ptr m_timeline; uint64_t semaphorValue = 0; - inline core::smart_refctd_ptr createShader( - const char* includeMainName) - { - std::string prelude = "#include \""; - auto hlslShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); - assert(hlslShader); - return m_device->compileShader({ hlslShader.get() }); - } - public: // Yay thanks to multiple inheritance we cannot forward ctors anymore FFT_Test(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -67,28 +60,23 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ if (!asset_base_t::onAppInitialized(std::move(system))) return false; - // this time we load a shader directly from a file smart_refctd_ptr shader; - /* { + { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl", lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return logFail("Could not load shader!"); // Cast down the asset to its proper type - auto source = IAsset::castDown(assets[0]); - // The down-cast should not fail! - assert(source); - - // Compile directly to SPIR-V Shader - shader = m_device->compileShader({ source.get() }); + shader = IAsset::castDown(assets[0]); + if (!shader) - return logFail("Creation of a SPIR-V Shader from HLSL Shader source failed!"); - }*/ - shader = createShader("app_resources/shader.comp.hlsl"); + return logFail("Invalid shader!"); + } // Create massive upload/download buffers constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index fcf8faa36..a2c5e752b 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -42,6 +42,7 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +set(SM 6_8) set(JSON [=[ [ { @@ -64,7 +65,7 @@ NBL_CREATE_NSC_COMPILE_RULES( DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build @@ -77,5 +78,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} - COMMON_OPTIONS ${COMPILE_OPTIONS} ) \ No newline at end of file diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index e8858f5a6..750756321 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -161,22 +161,22 @@ class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public B return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); // Load Custom Shader - auto loadPrecompiledShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "app_resources"; - - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; - auto shader = IAsset::castDown(assets[0]); - return shader; - }; - auto fragmentShader = loadPrecompiledShader.operator()<"present">("app_resources/present.frag.hlsl"); + auto shader = IAsset::castDown(assets[0]); + return shader; + }; + auto fragmentShader = loadPrecompiledShader.operator()<"present">(); // "app_resources/present.frag.hlsl" if (!fragmentShader) return logFail("Failed to load precompiled fragment shader!"); diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index c193dc63c..dd181ff87 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -76,16 +76,17 @@ set(DEPENDS shaders/main_pipeline/resolve_alphas.hlsl shaders/main_pipeline/vertex_shader.hlsl ) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) set(SM 6_8) - set(REQUIRED_CAPS [=[ - { - "kind": "features", - "name": "fragmentShaderPixelInterlock", - "type": "bool", - "values": [1] - } +{ + "kind": "features", + "name": "fragmentShaderPixelInterlock", + "type": "bool", + "values": [1] +} ]=]) set(JSON [=[ @@ -93,28 +94,30 @@ set(JSON [=[ { "INPUT": "shaders/main_pipeline/vertex_shader.hlsl", "KEY": "main_pipeline_vertex_shader", - "COMPILE_OPTIONS": ["-T", "cs_6_8"], - "DEPENDS": [], "CAPS": [${REQUIRED_CAPS}] }, { "INPUT": "shaders/main_pipeline/fragment.hlsl", "KEY": "main_pipeline_fragment_shader", - "COMPILE_OPTIONS": ["-T", "cs_6_8"], - "DEPENDS": [], "CAPS": [${REQUIRED_CAPS}] } ] ]=]) string(CONFIGURE "${JSON}" JSON) +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + NBL_CREATE_NSC_COMPILE_RULES( TARGET ${EXECUTABLE_NAME}SPIRV LINK_TO ${EXECUTABLE_NAME} DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index ec7b177eb..15ee597ec 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -961,28 +961,28 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio } // Load Custom Shader - auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "shaders"; - - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; - - return m_device->compileShader( ILogicalDevice::SShaderCreationParameters { .source = source.get(), .readCache = shaderReadCache.get(), .writeCache = shaderWriteCache.get(), .stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY }); - }; + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "shaders"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Failed to load a precompiled ahsder.", ILogger::ELL_ERROR); + return nullptr; + } + + + auto shader = IAsset::castDown(assets[0]); + return shader; + }; - mainPipelineFragmentShaders = loadCompileShader.operator()<"main_pipeline_fragment_shader">("../shaders/main_pipeline/fragment.hlsl"); - mainPipelineVertexShader = loadCompileShader.operator() <"main_pipeline_vertex_shader"> ("../shaders/main_pipeline/vertex_shader.hlsl"); + mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment.hlsl" + mainPipelineVertexShader = loadPrecompiledShader.operator() <"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl" core::smart_refctd_ptr shaderWriteCacheFile; { diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index 1b272bf2e..6470cdc74 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -39,6 +39,7 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +set(SM 6_8) set(JSON [=[ [ { @@ -65,7 +66,7 @@ NBL_CREATE_NSC_COMPILE_RULES( DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build @@ -78,5 +79,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} - COMMON_OPTIONS ${COMPILE_OPTIONS} ) \ No newline at end of file diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index 40f32624a..503c5a31a 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -35,6 +35,7 @@ set(DEPENDS target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +set(SM 6_8) set(JSON [=[ [ { @@ -57,7 +58,7 @@ NBL_CREATE_NSC_COMPILE_RULES( DEPENDS ${DEPENDS} BINARY_DIR ${OUTPUT_DIRECTORY} MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT - COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + COMMON_OPTIONS ${COMPILE_OPTIONS} OUTPUT_VAR KEYS INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp NAMESPACE nbl::this_example::builtin::build @@ -70,5 +71,4 @@ NBL_CREATE_RESOURCE_ARCHIVE( LINK_TO ${EXECUTABLE_NAME} BIND ${OUTPUT_DIRECTORY} BUILTINS ${KEYS} - COMMON_OPTIONS ${COMPILE_OPTIONS} ) \ No newline at end of file diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt index a434ff32a..19a561f78 100644 --- a/70_FLIPFluids/CMakeLists.txt +++ b/70_FLIPFluids/CMakeLists.txt @@ -21,4 +21,101 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/compute/advectParticles.comp.hlsl + app_resources/compute/applyBodyForces.comp.hlsl + app_resources/compute/diffusion.comp.hlsl + app_resources/compute/genParticleVertices.comp.hlsl + app_resources/compute/particlesInit.comp.hlsl + app_resources/compute/prepareCellUpdate.comp.hlsl + app_resources/compute/pressureSolver.comp.hlsl + app_resources/compute/updateFluidCells.comp.hlsl + app_resources/cellUtils.hlsl + app_resources/common.hlsl + app_resources/descriptor_bindings.hlsl + app_resources/fluidParticles.fragment.hlsl + app_resources/fluidParticles.vertex.hlsl + app_resources/gridSampling.hlsl + app_resources/gridUtils.hlsl + app_resources/render_common.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/compute/diffusion.comp.hlsl", + "KEY": "diffusion", + }, + { + "INPUT": "app_resources/fluidParticles.vertex.hlsl", + "KEY": "fluidParticles_vertex", + }, + { + "INPUT": "app_resources/fluidParticles.fragment.hlsl", + "KEY": "fluidParticles_fragment", + }, + { + "INPUT": "app_resources/compute/particlesInit.comp.hlsl", + "KEY": "particlesInit", + }, + { + "INPUT": "app_resources/compute/genParticleVertices.comp.hlsl", + "KEY": "genParticleVertices", + }, + { + "INPUT": "app_resources/compute/prepareCellUpdate.comp.hlsl", + "KEY": "prepareCellUpdate", + }, + { + "INPUT": "app_resources/compute/updateFluidCells.comp.hlsl", + "KEY": "updateFluidCells", + }, + { + "INPUT": "app_resources/compute/applyBodyForces.comp.hlsl", + "KEY": "applyBodyForces", + }, + { + "INPUT": "app_resources/compute/pressureSolver.comp.hlsl", + "KEY": "pressureSolver", + }, + { + "INPUT": "app_resources/compute/advectParticles.comp.hlsl", + "KEY": "advectParticles", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl index e53c91d2d..288b82764 100644 --- a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl @@ -67,6 +67,7 @@ void setAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID) } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void setNeighborAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; @@ -127,6 +128,7 @@ float3 calculateDiffusionVelStep(int3 idx, float3 sampledVelocity, uint cellMate } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID) { uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID(); @@ -212,6 +214,7 @@ void iterateDiffusion(uint32_t3 ID : SV_DispatchThreadID) // TODO: same as the pressure solver, this kernel/dispatch should be fused onto `iterateDiffusion` guarded by `isLastIteration` push constant [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void applyDiffusion(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl index b5db995c5..e71f05912 100644 --- a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl @@ -89,6 +89,7 @@ float calculatePressureStep(int3 idx) } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID) { uint3 gid = nbl::hlsl::glsl::gl_WorkGroupID(); @@ -168,6 +169,7 @@ void iteratePressureSystem(uint32_t3 ID : SV_DispatchThreadID) // TODO: why doesn't the last invocation of `iteratePressureSystem` have this step fused into it!? It would be just a simple push constant `isLastIteration` that would decide whether to run this dispatch [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void updateVelocities(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl index 62ddfd822..ea37660c1 100644 --- a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl @@ -23,6 +23,7 @@ cbuffer GridData // TODO: f 0 is AIR, and >=2 is SOLID, we can perform Atomic OR 0b01 to have a particle set the cell to FLUID, and this dispatch looping over all grid cells is not needed! [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void updateFluidCells(uint32_t3 ID : SV_DispatchThreadID) { int3 cIdx = ID; diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 899d00ba4..a70064245 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" // TODO: why is it not in nabla.h ? @@ -344,11 +345,12 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (!initGraphicsPipeline()) return logFail("Failed to initialize render pipeline!\n"); - auto createComputePipeline = [&](smart_refctd_ptr& pipeline, smart_refctd_ptr& pool, - smart_refctd_ptr& set, const std::string& shaderPath, const std::string& entryPoint, + + auto createComputePipeline = [&](smart_refctd_ptr& pipeline, smart_refctd_ptr& pool, + smart_refctd_ptr& set, const std::string& entryPoint, const std::span bindings, const asset::SPushConstantRange& pcRange = {}) -> void { - auto shader = compileShader(shaderPath, entryPoint); + auto shader = loadPrecompiledShader(); auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(bindings); @@ -378,8 +380,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // init particles pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_initParticlePipeline, m_initParticlePool, m_initParticleDs, - "app_resources/compute/particlesInit.comp.hlsl", "main", piParticlesInit_bs1, pcRange); + createComputePipeline.operator()<"particlesInit">(m_initParticlePipeline, m_initParticlePool, m_initParticleDs, + "main", piParticlesInit_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[1]; @@ -395,8 +397,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // generate particle vertex pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 3 * sizeof(uint64_t) }; - createComputePipeline(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs, - "app_resources/compute/genParticleVertices.comp.hlsl", "main", gpvGenVertices_bs1, pcRange); + createComputePipeline.operator()<"genParticleVertices">(m_genParticleVerticesPipeline, m_genVerticesPool, m_genVerticesDs, + "main", gpvGenVertices_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -414,8 +416,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso // update fluid cells pipelines { const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs, - "app_resources/compute/prepareCellUpdate.comp.hlsl", "main", ufcAccWeights_bs1, pcRange); + createComputePipeline.operator()<"prepareCellUpdate">(m_accumulateWeightsPipeline, m_accumulateWeightsPool, m_accumulateWeightsDs, + "main", ufcAccWeights_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -457,8 +459,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs, - "app_resources/compute/updateFluidCells.comp.hlsl", "updateFluidCells", ufcFluidCell_bs1); + createComputePipeline.operator()<"updateFluidCells">(m_updateFluidCellsPipeline, m_updateFluidCellsPool, m_updateFluidCellsDs, + "updateFluidCells", ufcFluidCell_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -479,8 +481,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs, - "app_resources/compute/updateFluidCells.comp.hlsl", "updateNeighborFluidCells", ufcNeighborCell_bs1); + createComputePipeline.operator()<"updateFluidCells">(m_updateNeighborCellsPipeline, m_updateNeighborCellsPool, m_updateNeighborCellsDs, + "updateNeighborFluidCells", ufcNeighborCell_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -527,8 +529,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } { // apply forces pipeline - createComputePipeline(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, - "app_resources/compute/applyBodyForces.comp.hlsl", "main", abfApplyForces_bs1); + createComputePipeline.operator()<"applyBodyForces">(m_applyBodyForcesPipeline, m_applyForcesPool, m_applyForcesDs, + "main", abfApplyForces_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -559,8 +561,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } // apply diffusion pipelines { - createComputePipeline(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, - "app_resources/compute/diffusion.comp.hlsl", "setAxisCellMaterial", dAxisCM_bs1); + createComputePipeline.operator()<"diffusion">(m_axisCellsPipeline, m_axisCellsPool, m_axisCellsDs, + "setAxisCellMaterial", dAxisCM_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -581,8 +583,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, - "app_resources/compute/diffusion.comp.hlsl", "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1); + createComputePipeline.operator()<"diffusion">(m_neighborAxisCellsPipeline, m_neighborAxisCellsPool, m_neighborAxisCellsDs, + "setNeighborAxisCellMaterial", dNeighborAxisCM_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -603,10 +605,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - const std::string iterateKernel = "iterateDiffusion"; - const std::string applyKernel = "applyDiffusion"; - auto iterateShader = compileShader("app_resources/compute/diffusion.comp.hlsl", iterateKernel); - auto applyShader = compileShader("app_resources/compute/diffusion.comp.hlsl", applyKernel); + smart_refctd_ptr diffusion = loadPrecompiledShader<"diffusion">(); // "app_resources/compute/diffusion.comp.hlsl" auto descriptorSetLayout1 = m_device->createDescriptorSetLayout(dDiffuse_bs1); @@ -625,16 +624,16 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); - params.shader.entryPoint = iterateKernel; - params.shader.shader = iterateShader.get(); + params.shader.entryPoint = "iterateDiffusion"; + params.shader.shader = diffusion.get(); m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline); } { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); - params.shader.entryPoint = applyKernel; - params.shader.shader = applyShader.get(); + params.shader.entryPoint = "applyDiffusion"; + params.shader.shader = diffusion.get(); m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline); } @@ -676,8 +675,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } // solve pressure system pipelines { - createComputePipeline(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, - "app_resources/compute/pressureSolver.comp.hlsl", "calculateNegativeDivergence", psDivergence_bs1); + createComputePipeline.operator()<"pressureSolver">(m_calcDivergencePipeline, m_calcDivergencePool, m_calcDivergenceDs, + "calculateNegativeDivergence", psDivergence_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[3]; @@ -711,8 +710,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs, - "app_resources/compute/pressureSolver.comp.hlsl", "iteratePressureSystem", psIteratePressure_bs1); + createComputePipeline.operator()<"pressureSolver">(m_iteratePressurePipeline, m_iteratePressurePool, m_iteratePressureDs, + "iteratePressureSystem", psIteratePressure_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[5]; @@ -740,8 +739,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso } } { - createComputePipeline(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, - "app_resources/compute/pressureSolver.comp.hlsl", "updateVelocities", psUpdateVelPs_bs1); + createComputePipeline.operator()<"pressureSolver">(m_updateVelPsPipeline, m_updateVelPsPool, m_updateVelPsDs, + "updateVelocities", psUpdateVelPs_bs1); { IGPUDescriptorSet::SDescriptorInfo infos[4]; @@ -780,8 +779,8 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso { // advect particles pipeline const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = 2 * sizeof(uint64_t) }; - createComputePipeline(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs, - "app_resources/compute/advectParticles.comp.hlsl", "main", apAdvectParticles_bs1, pcRange); + createComputePipeline.operator()<"advectParticles">(m_advectParticlesPipeline, m_advectParticlesPool, m_advectParticlesDs, + "main", apAdvectParticles_bs1, pcRange); { IGPUDescriptorSet::SDescriptorInfo infos[2]; @@ -1400,51 +1399,25 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso numParticles = m_gridData.particleInitSize.x * m_gridData.particleInitSize.y * m_gridData.particleInitSize.z * particlesPerCell; } - smart_refctd_ptr compileShader(const std::string& filePath, const std::string& entryPoint = "main") + template + smart_refctd_ptr loadPrecompiledShader() { IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(filePath, lparams); + lparams.workingDirectory = "app_resources"; + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey); exit(-1); } const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - const auto hlslMetadata = static_cast(bundle.getMetadata()); - const auto shaderStage = hlslMetadata->shaderStages->front(); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); - smart_refctd_ptr shader = shaderSrc; - if (entryPoint != "main") - { - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SOptions options = {}; - options.stage = shaderStage; - if (!(options.stage == IShader::E_SHADER_STAGE::ESS_COMPUTE || options.stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT)) - options.stage = IShader::E_SHADER_STAGE::ESS_VERTEX; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - #ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); - #endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = shaderSrc->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - std::string dxcOptionStr[] = {"-E " + entryPoint}; - options.dxcOptions = std::span(dxcOptionStr); - - shader = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); - } - - return m_device->compileShader({ shader.get() }); + return shader; } // TODO: there's a method in IUtilities for this @@ -1563,28 +1536,27 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso // init shaders and pipeline - auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = "app_resources"; + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto bundle = m_assetMgr->getAsset(key.data(), lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); - exit(-1); - } + m_logger->log("Failed to find shader with key '%s'.", ILogger::ELL_ERROR, ShaderKey); + exit(-1); + } - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shader = IAsset::castDown(assets[0]); - return m_device->compileShader({ shaderSrc.get() }); - }; - auto vs = compileShader("app_resources/fluidParticles.vertex.hlsl"); - auto fs = compileShader("app_resources/fluidParticles.fragment.hlsl"); + return shader; + }; + auto vs = loadPrecompiledShader.operator()<"fluidParticles_vertex">(); // "app_resources/fluidParticles.vertex.hlsl" + auto fs = loadPrecompiledShader.operator()<"fluidParticles_fragment">(); // "app_resources/fluidParticles.fragment.hlsl" smart_refctd_ptr descriptorSetLayout1; { From 974d23f74a537648ac307c08a81ea97908a74874 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 28 Oct 2025 16:11:57 +0100 Subject: [PATCH 15/57] Enabled build time shader compilation in example 10 --- 10_CountingSort/CMakeLists.txt | 68 +++++++++++++++++++++++ 10_CountingSort/app_resources/common.hlsl | 6 ++ 10_CountingSort/main.cpp | 45 +++++++++------ 3 files changed, 102 insertions(+), 17 deletions(-) diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt index b7cad41da..3acc73022 100644 --- a/10_CountingSort/CMakeLists.txt +++ b/10_CountingSort/CMakeLists.txt @@ -22,3 +22,71 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) endif() + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/prefix_sum_shader.comp.hlsl + app_resources/scatter_shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(REQUIRED_CAPS [=[ + { + "kind": "limits", + "name": "maxComputeWorkGroupInvocations", + "type": "uint32_t", + "values": [256,512,1024] + }, + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + } +]=]) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/prefix_sum_shader.comp.hlsl", + "KEY": "prefix_sum_shader", + "CAPS": [${REQUIRED_CAPS}] + }, + { + "INPUT": "app_resources/scatter_shader.comp.hlsl", + "KEY": "scatter_shader", + "CAPS": [${REQUIRED_CAPS}] + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/10_CountingSort/app_resources/common.hlsl b/10_CountingSort/app_resources/common.hlsl index bcbf01727..1074432b0 100644 --- a/10_CountingSort/app_resources/common.hlsl +++ b/10_CountingSort/app_resources/common.hlsl @@ -22,6 +22,10 @@ using namespace nbl::hlsl; #ifdef __HLSL_VERSION #include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +static const uint32_t WorkgroupSize = DeviceConfigCaps::maxComputeWorkGroupInvocations; +static const uint32_t MaxBucketCount = (DeviceConfigCaps::maxComputeSharedMemorySize / sizeof(uint32_t)) / 2; +static const uint32_t BucketCount = (MaxBucketCount > 3000) ? 3000 : MaxBucketCount; + using Ptr = bda::__ptr; using PtrAccessor = BdaAccessor; @@ -54,6 +58,8 @@ uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(WorkgroupSize, 1, 1); } + + #endif #endif \ No newline at end of file diff --git a/10_CountingSort/main.cpp b/10_CountingSort/main.cpp index d51650919..a22647750 100644 --- a/10_CountingSort/main.cpp +++ b/10_CountingSort/main.cpp @@ -1,4 +1,5 @@ #include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; using namespace nbl::core; @@ -32,19 +33,34 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio return false; auto limits = m_physicalDevice->getLimits(); + constexpr std::array AllowedMaxComputeSharedMemorySizes = { + 16384, 32768, 65536 + }; + + auto upperBoundSharedMemSize = std::upper_bound(AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize); + // devices which support less than 16KB of max compute shared memory size are not supported + if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin()) + { + m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize); + exit(0); + } + + limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1); + const uint32_t WorkgroupSize = limits.maxComputeWorkGroupInvocations; const uint32_t MaxBucketCount = (limits.maxComputeSharedMemorySize / sizeof(uint32_t)) / 2; constexpr uint32_t element_count = 100000; const uint32_t bucket_count = std::min((uint32_t)3000, MaxBucketCount); const uint32_t elements_per_thread = ceil((float)ceil((float)element_count / limits.computeUnits) / WorkgroupSize); - auto prepShader = [&](const core::string& path) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { // this time we load a shader directly from a file IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(path,lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(limits, m_physicalDevice->getFeatures()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) { @@ -52,29 +68,24 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio return nullptr; } - auto source = IAsset::castDown(assets[0]); + auto shader = IAsset::castDown(assets[0]); // The down-cast should not fail! - assert(source); + assert(shader); // There's two ways of doing stuff like this: // 1. this - modifying the asset after load // 2. creating a short shader source file that includes the asset you would have wanted to load - auto overrideSource = CHLSLCompiler::createOverridenCopy( - source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n", - WorkgroupSize, bucket_count - ); + // + //auto overrideSource = CHLSLCompiler::createOverridenCopy( + // source.get(), "#define WorkgroupSize %d\n#define BucketCount %d\n", + // WorkgroupSize, bucket_count + //); // this time we skip the use of the asset converter since the IShader->IGPUShader path is quick and simple - auto shader = m_device->compileShader({ overrideSource.get() }); - if (!shader) - { - logFail("Creation of Prefix Sum Shader from CPU Shader source failed!"); - return nullptr; - } return shader; }; - auto prefixSumShader = prepShader("app_resources/prefix_sum_shader.comp.hlsl"); - auto scatterShader = prepShader("app_resources/scatter_shader.comp.hlsl"); + auto prefixSumShader = loadPrecompiledShader.operator()<"prefix_sum_shader">(); // "app_resources/prefix_sum_shader.comp.hlsl" + auto scatterShader = loadPrecompiledShader.operator()<"scatter_shader">(); // "app_resources/scatter_shader.comp.hlsl" // People love Reflection but I prefer Shader Sources instead! const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE,.offset = 0,.size = sizeof(CountingPushData) }; From eb1e29f4d071956d8397108680cb0256ec012b5b Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Tue, 28 Oct 2025 16:25:00 +0100 Subject: [PATCH 16/57] Enabled build time shader compilation in example 71 --- 62_CAD/main.cpp | 59 +--------- 71_RayTracingPipeline/CMakeLists.txt | 101 ++++++++++++++++++ .../app_resources/raytrace.rahit.hlsl | 2 +- .../app_resources/raytrace.rchit.hlsl | 20 ++-- .../app_resources/raytrace.rgen.hlsl | 1 - .../app_resources/raytrace_shadow.rahit.hlsl | 2 +- 71_RayTracingPipeline/main.cpp | 99 +++++------------ 7 files changed, 138 insertions(+), 146 deletions(-) diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 15ee597ec..905177f6b 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -929,43 +929,12 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio smart_refctd_ptr mainPipelineVertexShader = {}; std::array, 2u> geoTexturePipelineShaders = {}; { - smart_refctd_ptr shaderReadCache = nullptr; - smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); - auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin"; - - { - core::smart_refctd_ptr shaderReadCacheFile; - { - system::ISystem::future_t> future; - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ); - if (future.wait()) - { - future.acquire().move_into(shaderReadCacheFile); - if (shaderReadCacheFile) - { - const size_t size = shaderReadCacheFile->getSize(); - if (size > 0ull) - { - std::vector contents(size); - system::IFile::success_t succ; - shaderReadCacheFile->read(succ, contents.data(), 0, size); - if (succ) - shaderReadCache = IShaderCompiler::CCache::deserialize(contents); - } - } - } - else - m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR); - } - - } - // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = "shaders"; + lp.workingDirectory = "app_resources"; auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); auto assetBundle = m_assetMgr->getAsset(key.data(), lp); @@ -983,32 +952,6 @@ class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplicatio mainPipelineFragmentShaders = loadPrecompiledShader.operator()<"main_pipeline_fragment_shader">(); // "../shaders/main_pipeline/fragment.hlsl" mainPipelineVertexShader = loadPrecompiledShader.operator() <"main_pipeline_vertex_shader">(); // "../shaders/main_pipeline/vertex_shader.hlsl" - - core::smart_refctd_ptr shaderWriteCacheFile; - { - system::ISystem::future_t> future; - m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE); - if (future.wait()) - { - future.acquire().move_into(shaderWriteCacheFile); - if (shaderWriteCacheFile) - { - auto serializedCache = shaderWriteCache->serialize(); - if (shaderWriteCacheFile) - { - system::IFile::success_t succ; - shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize()); - if (!succ) - m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR); - } - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } } // Shared Blend Params between pipelines diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index 07b0fd396..5c853040e 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -34,4 +34,105 @@ if(NBL_BUILD_IMGUI) endif() endif() +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/light_directional.rcall.hlsl + app_resources/light_point.rcall.hlsl + app_resources/light_spot.rcall.hlsl + app_resources/present.frag.hlsl + app_resources/raytrace.rahit.hlsl + app_resources/raytrace.rchit.hlsl + app_resources/raytrace.rgen.hlsl + app_resources/raytrace.rint.hlsl + app_resources/raytrace.rmiss.hlsl + app_resources/raytrace_procedural.rchit.hlsl + app_resources/raytrace_shadow.rahit.hlsl + app_resources/raytrace_shadow.rmiss.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/raytrace.rgen.hlsl", + "KEY": "raytrace_rgen", + }, + { + "INPUT": "app_resources/raytrace.rchit.hlsl", + "KEY": "raytrace_rchit", + }, + { + "INPUT": "app_resources/raytrace_procedural.rchit.hlsl", + "KEY": "raytrace_procedural_rchit", + }, + { + "INPUT": "app_resources/raytrace.rint.hlsl", + "KEY": "raytrace_rint", + }, + { + "INPUT": "app_resources/raytrace.rahit.hlsl", + "KEY": "raytrace_rahit", + }, + { + "INPUT": "app_resources/raytrace_shadow.rahit.hlsl", + "KEY": "raytrace_shadow_rahit", + }, + { + "INPUT": "app_resources/raytrace.rmiss.hlsl", + "KEY": "raytrace_rmiss", + }, + { + "INPUT": "app_resources/raytrace_shadow.rmiss.hlsl", + "KEY": "raytrace_shadow_rmiss", + }, + { + "INPUT": "app_resources/light_directional.rcall.hlsl", + "KEY": "light_directional_rcall", + }, + { + "INPUT": "app_resources/light_point.rcall.hlsl", + "KEY": "light_point_rcall", + }, + { + "INPUT": "app_resources/light_spot.rcall.hlsl", + "KEY": "light_spot_rcall", + }, + { + "INPUT": "app_resources/present.frag.hlsl", + "KEY": "present_frag", + } +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -O3 + -T lib_${SM} +) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) + diff --git a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl index 956ad5fe6..f5c9080e8 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl @@ -10,7 +10,7 @@ using namespace nbl::hlsl; void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo), 8); const uint32_t bitpattern = payload.pcg(); // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279 diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl index 0a8bc5ec8..dc83b5cd2 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl @@ -38,9 +38,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) if (normalBufferAddress == 0) { - float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12); - float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12); - float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12); + float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12, 8); + float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12, 8); + float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12, 8); return normalize(cross(v2 - v0, v1 - v0)); } @@ -50,9 +50,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) { case NT_R8G8B8A8_SNORM: { - uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4); - uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4); - uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4); + uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4, 8); + uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4, 8); + uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4, 8); n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); @@ -61,9 +61,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) break; case NT_R32G32B32_SFLOAT: { - n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12)); - n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12)); - n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12)); + n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12, 8)); + n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12, 8)); + n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12, 8)); } break; } @@ -81,7 +81,7 @@ void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes const int primID = spirv::PrimitiveId; const int instanceCustomIndex = spirv::InstanceCustomIndexKHR; const int geometryIndex = spirv::RayGeometryIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)); + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo), 8); const float32_t3 vertexNormal = calculateNormals(primID, geom, attribs.barycentrics); const float32_t3 worldNormal = normalize(mul(vertexNormal, transpose(spirv::WorldToObjectKHR)).xyz); diff --git a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl index efc99cad9..6571c5c67 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl @@ -1,6 +1,5 @@ #include "common.hlsl" -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" #include "nbl/builtin/hlsl/random/xoroshiro.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl index e41551512..dd83f92c9 100644 --- a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl @@ -10,7 +10,7 @@ using namespace nbl::hlsl; void main(inout OcclusionPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo), 8); const Material material = nbl::hlsl::_static_cast(geom.material); const float attenuation = (1.f-material.alpha) * payload.attenuation; diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 59b610f4b..ecaf53b7f 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -3,6 +3,8 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "common.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/indirect_commands.hlsl" @@ -106,95 +108,42 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - smart_refctd_ptr shaderReadCache = nullptr; - smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); - auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin"; - - { - core::smart_refctd_ptr shaderReadCacheFile; - { - system::ISystem::future_t> future; - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ); - if (future.wait()) - { - future.acquire().move_into(shaderReadCacheFile); - if (shaderReadCacheFile) - { - const size_t size = shaderReadCacheFile->getSize(); - if (size > 0ull) - { - std::vector contents(size); - system::IFile::success_t succ; - shaderReadCacheFile->read(succ, contents.data(), 0, size); - if (succ) - shaderReadCache = IShaderCompiler::CCache::deserialize(contents); - } - } - } - else - m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR); - } - - } - // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + auto loadPrecompiledShader = [&]() -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); const auto assets = assetBundle.getContents(); if (assets.empty()) return nullptr; // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto sourceRaw = IAsset::castDown(assets[0]); - if (!sourceRaw) + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); return nullptr; + } - return m_device->compileShader({ sourceRaw.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + return shader; }; // load shaders - const auto raygenShader = loadCompileAndCreateShader("app_resources/raytrace.rgen.hlsl"); - const auto closestHitShader = loadCompileAndCreateShader("app_resources/raytrace.rchit.hlsl"); - const auto proceduralClosestHitShader = loadCompileAndCreateShader("app_resources/raytrace_procedural.rchit.hlsl"); - const auto intersectionHitShader = loadCompileAndCreateShader("app_resources/raytrace.rint.hlsl"); - const auto anyHitShaderColorPayload = loadCompileAndCreateShader("app_resources/raytrace.rahit.hlsl"); - const auto anyHitShaderShadowPayload = loadCompileAndCreateShader("app_resources/raytrace_shadow.rahit.hlsl"); - const auto missShader = loadCompileAndCreateShader("app_resources/raytrace.rmiss.hlsl"); - const auto missShadowShader = loadCompileAndCreateShader("app_resources/raytrace_shadow.rmiss.hlsl"); - const auto directionalLightCallShader = loadCompileAndCreateShader("app_resources/light_directional.rcall.hlsl"); - const auto pointLightCallShader = loadCompileAndCreateShader("app_resources/light_point.rcall.hlsl"); - const auto spotLightCallShader = loadCompileAndCreateShader("app_resources/light_spot.rcall.hlsl"); - const auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); - - core::smart_refctd_ptr shaderWriteCacheFile; - { - system::ISystem::future_t> future; - m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json - m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE); - if (future.wait()) - { - future.acquire().move_into(shaderWriteCacheFile); - if (shaderWriteCacheFile) - { - auto serializedCache = shaderWriteCache->serialize(); - if (shaderWriteCacheFile) - { - system::IFile::success_t succ; - shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize()); - if (!succ) - m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR); - } - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } - else - m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); - } + const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" + const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" + const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" + const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" + const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" + const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" + const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" + const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" + const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" + const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" + const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" + const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) From e1e8dd6fb0c46612defeea46c960a6b85f4b4155 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 30 Oct 2025 18:34:18 +0100 Subject: [PATCH 17/57] Replaced `vk::RawBufferLoad` with `vk::PointerBuffer` in example 71 --- .../app_resources/common.hlsl | 7 ++++++ .../app_resources/raytrace.rahit.hlsl | 3 ++- .../app_resources/raytrace.rchit.hlsl | 25 +++++++++++-------- .../app_resources/raytrace.rgen.hlsl | 5 ++-- .../app_resources/raytrace.rint.hlsl | 3 ++- .../app_resources/raytrace_shadow.rahit.hlsl | 4 ++- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/71_RayTracingPipeline/app_resources/common.hlsl b/71_RayTracingPipeline/app_resources/common.hlsl index f9d67af78..502b53160 100644 --- a/71_RayTracingPipeline/app_resources/common.hlsl +++ b/71_RayTracingPipeline/app_resources/common.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/cpp_compat/basic.h" #include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" NBL_CONSTEXPR uint32_t WorkgroupSize = 16; NBL_CONSTEXPR uint32_t MAX_UNORM_10 = 1023; @@ -78,6 +79,9 @@ struct MaterialPacked return (xi>>22) > alpha; } }; +#ifdef __HLSL_VERSION +NBL_REGISTER_OBJ_TYPE(MaterialPacked, 4) +#endif struct SProceduralGeomInfo { @@ -103,6 +107,9 @@ struct STriangleGeomInfo uint32_t indexType : 1; // 16 bit, 32 bit }; +#ifdef __HLSL_VERSION +NBL_REGISTER_OBJ_TYPE(STriangleGeomInfo, 8) +#endif enum E_GEOM_TYPE : uint16_t { diff --git a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl index f5c9080e8..da7cc1594 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl @@ -10,7 +10,8 @@ using namespace nbl::hlsl; void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo), 8); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get(); const uint32_t bitpattern = payload.pcg(); // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279 diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl index dc83b5cd2..e6ebcda78 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl @@ -38,9 +38,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) if (normalBufferAddress == 0) { - float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12, 8); - float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12, 8); - float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12, 8); + float3 v0 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[0]).deref().load(); + float3 v1 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[1]).deref().load(); + float3 v2 = (nbl::hlsl::bda::__ptr::create(vertexBufferAddress) + indices[2]).deref().load(); return normalize(cross(v2 - v0, v1 - v0)); } @@ -50,9 +50,9 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) { case NT_R8G8B8A8_SNORM: { - uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4, 8); - uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4, 8); - uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4, 8); + uint32_t v0 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[0]).deref().load(); + uint32_t v1 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[1]).deref().load(); + uint32_t v2 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[2]).deref().load(); n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); @@ -61,9 +61,13 @@ float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) break; case NT_R32G32B32_SFLOAT: { - n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12, 8)); - n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12, 8)); - n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12, 8)); + float3 v0 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[0]).deref().load(); + float3 v1 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[1]).deref().load(); + float3 v2 = (nbl::hlsl::bda::__ptr::create(normalBufferAddress) + indices[2]).deref().load(); + + n0 = normalize(v0); + n1 = normalize(v1); + n2 = normalize(v2); } break; } @@ -81,7 +85,8 @@ void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes const int primID = spirv::PrimitiveId; const int instanceCustomIndex = spirv::InstanceCustomIndexKHR; const int geometryIndex = spirv::RayGeometryIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo), 8); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)).Get(); const float32_t3 vertexNormal = calculateNormals(primID, geom, attribs.barycentrics); const float32_t3 worldNormal = normalize(mul(vertexNormal, transpose(spirv::WorldToObjectKHR)).xyz); diff --git a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl index 6571c5c67..c42d5a7df 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl @@ -79,15 +79,16 @@ void main() Material material; MaterialId materialId = payload.materialId; + const static uint64_t MaterialPackedAlignment = nbl::hlsl::alignment_of_v; // we use negative index to indicate that this is a procedural geometry if (materialId.isHitProceduralGeom()) { - const MaterialPacked materialPacked = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)); + const MaterialPacked materialPacked = vk::BufferPointer(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)).Get(); material = nbl::hlsl::_static_cast(materialPacked); } else { - const MaterialPacked materialPacked = vk::RawBufferLoad(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)); + const MaterialPacked materialPacked = vk::BufferPointer(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)).Get(); material = nbl::hlsl::_static_cast(materialPacked); } diff --git a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl index 72f9beffd..551be1c8a 100644 --- a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl @@ -36,8 +36,9 @@ void main() const int primID = spirv::PrimitiveId; + const static uint64_t SProceduralGeomInfoAlignment = nbl::hlsl::alignment_of_v; // Sphere data - SProceduralGeomInfo sphere = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)); + SProceduralGeomInfo sphere = vk::BufferPointer(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)).Get(); const float32_t tHit = hitSphere(sphere, ray); diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl index dd83f92c9..d87b8dd5d 100644 --- a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl +++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl @@ -1,6 +1,7 @@ #include "common.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" +#include "nbl/builtin/hlsl/type_traits.hlsl" using namespace nbl::hlsl; @@ -10,7 +11,8 @@ using namespace nbl::hlsl; void main(inout OcclusionPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { const int instID = spirv::InstanceCustomIndexKHR; - const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo), 8); + const static uint64_t STriangleGeomInfoAlignment = nbl::hlsl::alignment_of_v; + const STriangleGeomInfo geom = vk::BufferPointer(pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)).Get(); const Material material = nbl::hlsl::_static_cast(geom.material); const float attenuation = (1.f-material.alpha) * payload.attenuation; From 08c898d5af460ba6469a78fb625216e27a1bc8a8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 27 Nov 2025 20:22:03 +0700 Subject: [PATCH 18/57] Reindex mortons example from 12 to 73 --- {12_Mortons => 73_Mortons}/CMakeLists.txt | 0 {12_Mortons => 73_Mortons}/CTester.h | 2 -- {12_Mortons => 73_Mortons}/ITester.h | 27 +++---------------- .../app_resources/common.hlsl | 0 .../app_resources/test.comp.hlsl | 1 + .../app_resources/testCommon.hlsl | 0 .../config.json.template | 0 {12_Mortons => 73_Mortons}/main.cpp | 7 ++--- {12_Mortons => 73_Mortons}/pipeline.groovy | 0 CMakeLists.txt | 2 +- 10 files changed, 10 insertions(+), 29 deletions(-) rename {12_Mortons => 73_Mortons}/CMakeLists.txt (100%) rename {12_Mortons => 73_Mortons}/CTester.h (99%) rename {12_Mortons => 73_Mortons}/ITester.h (90%) rename {12_Mortons => 73_Mortons}/app_resources/common.hlsl (100%) rename {12_Mortons => 73_Mortons}/app_resources/test.comp.hlsl (96%) rename {12_Mortons => 73_Mortons}/app_resources/testCommon.hlsl (100%) rename {12_Mortons => 73_Mortons}/config.json.template (100%) rename {12_Mortons => 73_Mortons}/main.cpp (89%) rename {12_Mortons => 73_Mortons}/pipeline.groovy (100%) diff --git a/12_Mortons/CMakeLists.txt b/73_Mortons/CMakeLists.txt similarity index 100% rename from 12_Mortons/CMakeLists.txt rename to 73_Mortons/CMakeLists.txt diff --git a/12_Mortons/CTester.h b/73_Mortons/CTester.h similarity index 99% rename from 12_Mortons/CTester.h rename to 73_Mortons/CTester.h index 5a61be501..c47e94376 100644 --- a/12_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -3,8 +3,6 @@ #include #include "app_resources/testCommon.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "ITester.h" using namespace nbl; diff --git a/12_Mortons/ITester.h b/73_Mortons/ITester.h similarity index 90% rename from 12_Mortons/ITester.h rename to 73_Mortons/ITester.h index 2510dd997..a0c76ac75 100644 --- a/12_Mortons/ITester.h +++ b/73_Mortons/ITester.h @@ -4,7 +4,6 @@ #include #include "app_resources/common.hlsl" #include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" using namespace nbl; @@ -45,7 +44,7 @@ class ITester logFail("Failed to create Command Buffers!\n"); // Load shaders, set up pipeline - core::smart_refctd_ptr shader; + core::smart_refctd_ptr shader; { asset::IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -53,31 +52,13 @@ class ITester auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); const auto assets = assetBundle.getContents(); if (assets.empty()) - { - logFail("Could not load shader!"); - assert(0); - } + return logFail("Could not load shader!"); // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - - auto* compilerSet = m_assetMgr->getCompilerSet(); - - asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = source->getStage(); - options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; - options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - - auto spirv = compilerSet->compileToSPIRV(source.get(), options); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); - video::ILogicalDevice::SShaderCreationParameters params{}; - params.cpushader = spirv.get(); - shader = m_device->createShader(params); + shader = m_device->compileShader({source.get()}); } if (!shader) diff --git a/12_Mortons/app_resources/common.hlsl b/73_Mortons/app_resources/common.hlsl similarity index 100% rename from 12_Mortons/app_resources/common.hlsl rename to 73_Mortons/app_resources/common.hlsl diff --git a/12_Mortons/app_resources/test.comp.hlsl b/73_Mortons/app_resources/test.comp.hlsl similarity index 96% rename from 12_Mortons/app_resources/test.comp.hlsl rename to 73_Mortons/app_resources/test.comp.hlsl index 243983d5a..d1010aeb0 100644 --- a/12_Mortons/app_resources/test.comp.hlsl +++ b/73_Mortons/app_resources/test.comp.hlsl @@ -8,6 +8,7 @@ [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; [numthreads(256, 1, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { if (invocationID.x == 0) diff --git a/12_Mortons/app_resources/testCommon.hlsl b/73_Mortons/app_resources/testCommon.hlsl similarity index 100% rename from 12_Mortons/app_resources/testCommon.hlsl rename to 73_Mortons/app_resources/testCommon.hlsl diff --git a/12_Mortons/config.json.template b/73_Mortons/config.json.template similarity index 100% rename from 12_Mortons/config.json.template rename to 73_Mortons/config.json.template diff --git a/12_Mortons/main.cpp b/73_Mortons/main.cpp similarity index 89% rename from 12_Mortons/main.cpp rename to 73_Mortons/main.cpp index a05e61842..6034e3469 100644 --- a/12_Mortons/main.cpp +++ b/73_Mortons/main.cpp @@ -7,7 +7,7 @@ #include #include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" #include "app_resources/common.hlsl" #include "CTester.h" @@ -17,12 +17,13 @@ using namespace nbl::hlsl; using namespace nbl::system; using namespace nbl::asset; using namespace nbl::video; +using namespace nbl::examples; using namespace nbl::application_templates; -class MortonTest final : public MonoDeviceApplication, public MonoAssetManagerAndBuiltinResourceApplication +class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = MonoDeviceApplication; - using asset_base_t = MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; public: MortonTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { diff --git a/12_Mortons/pipeline.groovy b/73_Mortons/pipeline.groovy similarity index 100% rename from 12_Mortons/pipeline.groovy rename to 73_Mortons/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index eff5154dc..b85577144 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) - add_subdirectory(12_Mortons EXCLUDE_FROM_ALL) + add_subdirectory(73_Mortons EXCLUDE_FROM_ALL) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 7f8dd73473e47d3ea02537ed042859b913855f13 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 28 Nov 2025 20:26:58 +0700 Subject: [PATCH 19/57] Global variable of hlsl to use NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR --- .../app_resources/common.hlsl | 6 +-- 27_MPMCScheduler/app_resources/common.hlsl | 6 +-- 62_CAD/shaders/geotexture/common.hlsl | 2 +- 62_CAD/shaders/globals.hlsl | 38 +++++++++---------- .../app_resources/benchmark/common.hlsl | 8 ++-- .../app_resources/common.hlsl | 2 +- 66_HLSLBxDFTests/app_resources/tests.hlsl | 8 ++-- 67_RayQueryGeometry/app_resources/common.hlsl | 2 +- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/07_StagingAndMultipleQueues/app_resources/common.hlsl b/07_StagingAndMultipleQueues/app_resources/common.hlsl index 259d5069d..de15810c9 100644 --- a/07_StagingAndMultipleQueues/app_resources/common.hlsl +++ b/07_StagingAndMultipleQueues/app_resources/common.hlsl @@ -1,8 +1,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSizeX = 16; -NBL_CONSTEXPR uint32_t WorkgroupSizeY = 16; -NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; static const uint32_t FRAMES_IN_FLIGHT = 3u; diff --git a/27_MPMCScheduler/app_resources/common.hlsl b/27_MPMCScheduler/app_resources/common.hlsl index 2fb8971ad..2783f13a2 100644 --- a/27_MPMCScheduler/app_resources/common.hlsl +++ b/27_MPMCScheduler/app_resources/common.hlsl @@ -1,8 +1,8 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSizeX = 8; -NBL_CONSTEXPR uint32_t WorkgroupSizeY = 8; -NBL_CONSTEXPR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeX = 8; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSizeY = 8; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = WorkgroupSizeX*WorkgroupSizeY; struct PushConstants { diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl index 691cd3d3b..f2053e003 100644 --- a/62_CAD/shaders/geotexture/common.hlsl +++ b/62_CAD/shaders/geotexture/common.hlsl @@ -4,7 +4,7 @@ #include "../globals.hlsl" // Handle multiple geo textures, separate set, array of texture? index allocator? or multiple sets? -NBL_CONSTEXPR uint32_t MaxGeoTextures = 256; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxGeoTextures = 256; // GeoTexture Oriented Bounding Box struct GeoTextureOBB diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 5c3681910..7c2b7e893 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -352,8 +352,8 @@ static_assert(offsetof(CurveBox, curveMax[0]) == 56u); static_assert(sizeof(CurveBox) == 80u); #endif -NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff; -NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidRigidSegmentIndex = 0xffffffff; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; // TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like stipple patterns) and requires upper_bound to figure out the color based on height value. @@ -547,27 +547,27 @@ inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) } #endif -NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128; -NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture -NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; -NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; -NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t ImagesBindingArraySize = 128; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t AlphaBits = 32u - MainObjectIdxBits; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; // Hatches -NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; -NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); // Text or MSDF Hatches -NBL_CONSTEXPR float MSDFPixelRange = 4.0f; -NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; -NBL_CONSTEXPR float MSDFSize = 64.0f; -NBL_CONSTEXPR uint32_t MSDFMips = 4; -NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRange = 4.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float MSDFSize = 64.0f; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t MSDFMips = 4; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float HatchFillMSDFSceenSpaceSize = 8.0; inline bool isInvalidGridDtmHeightValue(float value) { diff --git a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl index 98875c42f..7f6d1dec1 100644 --- a/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl +++ b/64_EmulatedFloatTest/app_resources/benchmark/common.hlsl @@ -4,10 +4,10 @@ #include -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; -NBL_CONSTEXPR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_X = 128u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Y = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_DIMENSION_SIZE_Z = 1u; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t BENCHMARK_WORKGROUP_COUNT = 1024u; enum EF64_BENCHMARK_MODE { diff --git a/64_EmulatedFloatTest/app_resources/common.hlsl b/64_EmulatedFloatTest/app_resources/common.hlsl index aea1ce94d..0e8762c5a 100644 --- a/64_EmulatedFloatTest/app_resources/common.hlsl +++ b/64_EmulatedFloatTest/app_resources/common.hlsl @@ -8,7 +8,7 @@ #include #include -NBL_CONSTEXPR uint32_t WORKGROUP_SIZE = 1; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WORKGROUP_SIZE = 1; using namespace nbl; using namespace hlsl; diff --git a/66_HLSLBxDFTests/app_resources/tests.hlsl b/66_HLSLBxDFTests/app_resources/tests.hlsl index 256ed3ce9..6f67c359f 100644 --- a/66_HLSLBxDFTests/app_resources/tests.hlsl +++ b/66_HLSLBxDFTests/app_resources/tests.hlsl @@ -356,13 +356,13 @@ struct is_microfacet_bsdf : bool_constant< > {}; template -NBL_CONSTEXPR bool is_basic_brdf_v = is_basic_brdf::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_basic_brdf_v = is_basic_brdf::value; template -NBL_CONSTEXPR bool is_microfacet_brdf_v = is_microfacet_brdf::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_microfacet_brdf_v = is_microfacet_brdf::value; template -NBL_CONSTEXPR bool is_basic_bsdf_v = is_basic_bsdf::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_basic_bsdf_v = is_basic_bsdf::value; template -NBL_CONSTEXPR bool is_microfacet_bsdf_v = is_microfacet_bsdf::value; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR bool is_microfacet_bsdf_v = is_microfacet_bsdf::value; template diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl index 68a353adc..ecac0f59d 100644 --- a/67_RayQueryGeometry/app_resources/common.hlsl +++ b/67_RayQueryGeometry/app_resources/common.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" -NBL_CONSTEXPR uint32_t WorkgroupSize = 16; +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint32_t WorkgroupSize = 16; enum NormalType : uint32_t { From 43b8634502fd09e1405bf9a07f55bca21d613823 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 17:58:40 +0700 Subject: [PATCH 20/57] Add test for operator- --- 73_Mortons/CTester.h | 2 ++ 73_Mortons/app_resources/common.hlsl | 1 + 73_Mortons/app_resources/testCommon.hlsl | 5 ++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index c47e94376..fa29f3c9c 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -42,6 +42,7 @@ class CTester final : public ITester expected.emulatedNot = _static_cast(~generatedA); expected.emulatedPlus = _static_cast(generatedA + generatedB); expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); expected.emulatedLess = uint32_t(generatedA < generatedB); expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); expected.emulatedGreater = uint32_t(generatedA > generatedB); @@ -273,6 +274,7 @@ class CTester final : public ITester verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); + verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); // Morton Plus verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); diff --git a/73_Mortons/app_resources/common.hlsl b/73_Mortons/app_resources/common.hlsl index b058ad821..18cdc058f 100644 --- a/73_Mortons/app_resources/common.hlsl +++ b/73_Mortons/app_resources/common.hlsl @@ -61,6 +61,7 @@ struct TestValues emulated_uint64_t emulatedNot; emulated_uint64_t emulatedPlus; emulated_uint64_t emulatedMinus; + emulated_int64_t emulatedUnaryMinus; // These are bools but stored as uint because you can't store bools, causes a SPIR-V issue uint32_t emulatedLess; uint32_t emulatedLessEqual; diff --git a/73_Mortons/app_resources/testCommon.hlsl b/73_Mortons/app_resources/testCommon.hlsl index 9ff9a4fa8..4ca2b859d 100644 --- a/73_Mortons/app_resources/testCommon.hlsl +++ b/73_Mortons/app_resources/testCommon.hlsl @@ -4,6 +4,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa { emulated_uint64_t emulatedA = _static_cast(input.generatedA); emulated_uint64_t emulatedB = _static_cast(input.generatedB); + emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); // Emulated int tests output.emulatedAnd = emulatedA & emulatedB; @@ -24,7 +25,9 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); arithmetic_right_shift_operator signedRightShift; - output.emulatedSignedRightShifted = signedRightShift(_static_cast(emulatedA), input.shift); + output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); + + output.emulatedUnaryMinus = signedEmulatedA.operator-(); // Morton tests uint64_t2 Vec2A = { input.coordX, input.coordY }; From ba6641f6de9e107c923d3999cd7f52a8774797b7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:20:31 +0700 Subject: [PATCH 21/57] Use 1, 1, 1 workgroup dimension --- 73_Mortons/app_resources/test.comp.hlsl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/73_Mortons/app_resources/test.comp.hlsl b/73_Mortons/app_resources/test.comp.hlsl index d1010aeb0..60cdf94b1 100644 --- a/73_Mortons/app_resources/test.comp.hlsl +++ b/73_Mortons/app_resources/test.comp.hlsl @@ -3,14 +3,15 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "testCommon.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" [[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; [[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; -[numthreads(256, 1, 1)] +[numthreads(1, 1, 1)] [shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { - if (invocationID.x == 0) - fillTestValues(inputTestValues[0], outputTestValues[0]); + uint32_t testID = glsl::gl_GlobalInvocationID().x; + fillTestValues(inputTestValues[testID], outputTestValues[testID]); } From e830c3423b723e4efd9dbc2d2a981098e6830a56 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 1 Dec 2025 18:27:44 +0700 Subject: [PATCH 22/57] Enable previously failed test because of bug in glm --- 22_CppCompat/CIntrinsicsTester.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index fa35d1a68..f014bd1cb 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -250,9 +250,8 @@ class CIntrinsicsTester final : public ITester verifyTestValue("smoothStep", expectedTestValues.smoothStep, testValues.smoothStep, testType); verifyTestValue("addCarryResult", expectedTestValues.addCarry.result, testValues.addCarry.result, testType); verifyTestValue("addCarryCarry", expectedTestValues.addCarry.carry, testValues.addCarry.carry, testType); - // Disabled: current glm implementation is wrong - //verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); - //verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); + verifyTestValue("subBorrowResult", expectedTestValues.subBorrow.result, testValues.subBorrow.result, testType); + verifyTestValue("subBorrowBorrow", expectedTestValues.subBorrow.borrow, testValues.subBorrow.borrow, testType); verifyTestVector3dValue("normalize", expectedTestValues.normalize, testValues.normalize, testType); verifyTestVector3dValue("cross", expectedTestValues.cross, testValues.cross, testType); @@ -277,9 +276,8 @@ class CIntrinsicsTester final : public ITester verifyTestVector3dValue("refract", expectedTestValues.refract, testValues.refract, testType); verifyTestVector3dValue("addCarryVecResult", expectedTestValues.addCarryVec.result, testValues.addCarryVec.result, testType); verifyTestVector3dValue("addCarryVecCarry", expectedTestValues.addCarryVec.carry, testValues.addCarryVec.carry, testType); - // Disabled: current glm implementation is wrong - //verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); - //verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); + verifyTestVector3dValue("subBorrowVecResult", expectedTestValues.subBorrowVec.result, testValues.subBorrowVec.result, testType); + verifyTestVector3dValue("subBorrowVecBorrow", expectedTestValues.subBorrowVec.borrow, testValues.subBorrowVec.borrow, testType); verifyTestMatrix3x3Value("mul", expectedTestValues.mul, testValues.mul, testType); verifyTestMatrix3x3Value("transpose", expectedTestValues.transpose, testValues.transpose, testType); From f18160276e78f860f64c45111c874e3351b44ffb Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 3 Dec 2025 23:24:18 +0300 Subject: [PATCH 23/57] New example, copy of 61_UI, updated a lot, visualizer, still not "solid angle", rest should be shader work --- 72_SolidAngleVisualizer/CMakeLists.txt | 20 + 72_SolidAngleVisualizer/README.md | 0 .../hlsl/SolidAngleVis.frag.hlsl | 175 +++ .../app_resources/hlsl/common.hlsl | 14 + 72_SolidAngleVisualizer/config.json.template | 28 + 72_SolidAngleVisualizer/include/common.hpp | 20 + 72_SolidAngleVisualizer/include/transform.hpp | 172 +++ 72_SolidAngleVisualizer/main.cpp | 1105 +++++++++++++++++ 72_SolidAngleVisualizer/pipeline.groovy | 50 + 72_SolidAngleVisualizer/src/transform.cpp | 0 CMakeLists.txt | 1 + 11 files changed, 1585 insertions(+) create mode 100644 72_SolidAngleVisualizer/CMakeLists.txt create mode 100644 72_SolidAngleVisualizer/README.md create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl create mode 100644 72_SolidAngleVisualizer/config.json.template create mode 100644 72_SolidAngleVisualizer/include/common.hpp create mode 100644 72_SolidAngleVisualizer/include/transform.hpp create mode 100644 72_SolidAngleVisualizer/main.cpp create mode 100644 72_SolidAngleVisualizer/pipeline.groovy create mode 100644 72_SolidAngleVisualizer/src/transform.cpp diff --git a/72_SolidAngleVisualizer/CMakeLists.txt b/72_SolidAngleVisualizer/CMakeLists.txt new file mode 100644 index 000000000..5d0021f61 --- /dev/null +++ b/72_SolidAngleVisualizer/CMakeLists.txt @@ -0,0 +1,20 @@ +if(NBL_BUILD_IMGUI) + set(NBL_EXTRA_SOURCES + "${CMAKE_CURRENT_SOURCE_DIR}/src/transform.cpp" + ) + + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + imguizmo + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet + # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) +endif() \ No newline at end of file diff --git a/72_SolidAngleVisualizer/README.md b/72_SolidAngleVisualizer/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl new file mode 100644 index 000000000..d783a5b37 --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -0,0 +1,175 @@ +#pragma wave shader_stage(fragment) + +#include "common.hlsl" + +#include + +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +[[vk::push_constant]] struct PushConstants pc; + +static const float CIRCLE_RADIUS = 0.45f; + +// --- Geometry Utils --- + +// Adjacency of edges to faces +static const int2 edgeToFaces[12] = { + {4,2}, {3,4}, {2,5}, {5,3}, + {2,0}, {0,3}, {1,2}, {3,1}, + {0,4}, {5,0}, {4,1}, {1,5} +}; + +static const float3 localNormals[6] = { + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) +}; + +static float3 corners[8]; +static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), + float3(0,0,0), float3(0,0,0), float3(0,0,0) }; +static float2 projCorners[8]; + + +// Converts UV into centered, aspect-corrected NDC circle space +float2 toCircleSpace(float2 uv) +{ + float aspect = pc.viewport.z / pc.viewport.w; + float2 centered = uv - 0.5f; + centered.x *= aspect; + return centered; +} + +// Distance to a 2D line segment +float sdSegment(float2 p, float2 a, float2 b) +{ + float2 pa = p - a; + float2 ba = b - a; + float h = clamp(dot(pa, ba) / dot(ba, ba), 0.0f, 1.0f); + return length(pa - ba * h); +} + +// TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) +float2 project(float3 p) +{ + return normalize(p).xy; +} + +void computeCubeGeo() +{ + for (int i = 0; i < 8; i++) + { + float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + + corners[i] = worldPos; + + faceCenters[i/4] += worldPos / 4.0f; + faceCenters[2+i%2] += worldPos / 4.0f; + faceCenters[4+(i/2)%2] += worldPos / 4.0f; + + float3 viewPos = worldPos; + projCorners[i] = project(viewPos); + } +} + +int getVisibilityCount(int2 faces, float3 cameraPos) +{ + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; + float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; + + // Face is visible if its outward normal points towards the origin (camera). + bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; + bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; + + // Determine Line Style: + bool isSilhouette = visible1 != visible2; // One face visible, the other hidden + bool isInner = visible1 && visible2; // Both faces visible + + int visibilityCount = 0; + if (isSilhouette) + { + visibilityCount = 1; + } + else if (isInner) + { + visibilityCount = 2; + } + + return visibilityCount; +} + +void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) +{ + if (visibilityCount > 0) + { + float3 A = corners[a]; + float3 B = corners[b]; + + float avgDepth = (length(A) + length(B)) * 0.5f; + float referenceDepth = 3.0f; + float depthScale = referenceDepth / avgDepth; + + float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; + float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; + float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red + + float width = min(baseWidth * depthScale, 0.03f); + + float dist = sdSegment(p, projCorners[a], projCorners[b]); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + color += edgeColor * alpha * intensity; + } +} + +void drawRing(float2 p, inout float4 color, float aaWidth) +{ + float positionLength = length(p); + + // Mask to cut off drawing outside the circle + // float circleMask = 1.0f - smoothstep(CIRCLE_RADIUS, CIRCLE_RADIUS + aaWidth, positionLength); + // color *= circleMask; + + // Add a white background circle ring + float ringWidth = 0.005f; + float ringDistance = abs(positionLength - CIRCLE_RADIUS); + float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + + // Ring color is now white + color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); +} + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float3 cameraPos = float3(0, 0, 0); // Camera at origin + float2 p = toCircleSpace(vx.uv); + float4 color = float4(0, 0, 0, 0); + + computeCubeGeo(); + + float aaWidth = max(fwidth(p.x), fwidth(p.y)); + + for (int j = 0; j < 12; j++) + { + int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + int b = a + (4 >> (j / 4)); + + int2 faces = edgeToFaces[j]; + int visibilityCount = getVisibilityCount(faces, cameraPos); + drawLine(p, a, b, visibilityCount, color, aaWidth); + } + + drawRing(p, color, aaWidth); + + return color; +} \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl new file mode 100644 index 000000000..80368d08f --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -0,0 +1,14 @@ +#ifndef _SOLID_ANGLE_VIS_COMMON_HLSL_ +#define _SOLID_ANGLE_VIS_COMMON_HLSL_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + + +struct PushConstants +{ + nbl::hlsl::float32_t3x4 modelMatrix; + nbl::hlsl::float32_t4 viewport; +}; + + +#endif // _SOLID_ANGLE_VIS_COMMON_HLSL_ diff --git a/72_SolidAngleVisualizer/config.json.template b/72_SolidAngleVisualizer/config.json.template new file mode 100644 index 000000000..f961745c1 --- /dev/null +++ b/72_SolidAngleVisualizer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/common.hpp b/72_SolidAngleVisualizer/include/common.hpp new file mode 100644 index 000000000..2e8e985dd --- /dev/null +++ b/72_SolidAngleVisualizer/include/common.hpp @@ -0,0 +1,20 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +// the example's headers +#include "transform.hpp" +#include "nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp new file mode 100644 index 000000000..002a9d215 --- /dev/null +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -0,0 +1,172 @@ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + + +#include "nbl/ui/ICursorControl.h" + +#include "nbl/ext/ImGui/ImGui.h" + +#include "imgui/imgui_internal.h" +#include "imguizmo/ImGuizmo.h" + + +struct TransformRequestParams +{ + float camDistance = 8.f; + uint8_t sceneTexDescIx = ~0; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; +}; + +struct TransformReturnInfo +{ + nbl::hlsl::uint16_t2 sceneResolution = { 2048,1024 }; + bool isGizmoWindowHovered; + bool isGizmoBeingUsed; +}; + +TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +{ + static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); + static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); + static bool useSnap = false; + static float snap[3] = { 1.f, 1.f, 1.f }; + static float bounds[] = { -0.5f, -0.5f, -0.5f, 0.5f, 0.5f, 0.5f }; + static float boundsSnap[] = { 0.1f, 0.1f, 0.1f }; + static bool boundSizing = false; + static bool boundSizingSnap = false; + + if (params.editTransformDecomposition) + { + if (ImGui::IsKeyPressed(ImGuiKey_T)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + if (ImGui::IsKeyPressed(ImGuiKey_R)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + if (ImGui::IsKeyPressed(ImGuiKey_S)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) + mCurrentGizmoOperation = ImGuizmo::TRANSLATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Rotate", mCurrentGizmoOperation == ImGuizmo::ROTATE)) + mCurrentGizmoOperation = ImGuizmo::ROTATE; + ImGui::SameLine(); + if (ImGui::RadioButton("Scale", mCurrentGizmoOperation == ImGuizmo::SCALE)) + mCurrentGizmoOperation = ImGuizmo::SCALE; + if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) + mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + float matrixTranslation[3], matrixRotation[3], matrixScale[3]; + ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); + ImGui::InputFloat3("Tr", matrixTranslation); + ImGui::InputFloat3("Rt", matrixRotation); + ImGui::InputFloat3("Sc", matrixScale); + ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); + + if (mCurrentGizmoOperation != ImGuizmo::SCALE) + { + if (ImGui::RadioButton("Local", mCurrentGizmoMode == ImGuizmo::LOCAL)) + mCurrentGizmoMode = ImGuizmo::LOCAL; + ImGui::SameLine(); + if (ImGui::RadioButton("World", mCurrentGizmoMode == ImGuizmo::WORLD)) + mCurrentGizmoMode = ImGuizmo::WORLD; + } + if (ImGui::IsKeyPressed(ImGuiKey_S) && ImGui::IsKeyPressed(ImGuiKey_LeftShift)) + useSnap = !useSnap; + ImGui::Checkbox("##UseSnap", &useSnap); + ImGui::SameLine(); + + switch (mCurrentGizmoOperation) + { + case ImGuizmo::TRANSLATE: + ImGui::InputFloat3("Snap", &snap[0]); + break; + case ImGuizmo::ROTATE: + ImGui::InputFloat("Angle Snap", &snap[0]); + break; + case ImGuizmo::SCALE: + ImGui::InputFloat("Scale Snap", &snap[0]); + break; + } + ImGui::Checkbox("Bound Sizing", &boundSizing); + if (boundSizing) + { + ImGui::PushID(3); + ImGui::Checkbox("##BoundSizing", &boundSizingSnap); + ImGui::SameLine(); + ImGui::InputFloat3("Snap", boundsSnap); + ImGui::PopID(); + } + } + + ImGuiIO& io = ImGui::GetIO(); + float viewManipulateRight = io.DisplaySize.x; + float viewManipulateTop = 0; + static ImGuiWindowFlags gizmoWindowFlags = 0; + + /* + for the "useWindow" case we just render to a gui area, + otherwise to fake full screen transparent window + + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions + */ +// TODO: this shouldn't be handled here I think + SImResourceInfo info; + info.textureID = params.sceneTexDescIx; + info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + + TransformReturnInfo retval; + if (params.useWindow) + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(400, 20), ImGuiCond_Appearing); + ImGui::PushStyleColor(ImGuiCol_WindowBg, (ImVec4)ImColor(0.35f, 0.3f, 0.3f)); + ImGui::Begin("Gizmo", 0, gizmoWindowFlags); + ImGuizmo::SetDrawlist(); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 windowPos = ImGui::GetWindowPos(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; + retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + + ImGuiWindow* window = ImGui::GetCurrentWindow(); + gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + } + else + { + ImGui::SetNextWindowPos(ImVec2(0, 0)); + ImGui::SetNextWindowSize(io.DisplaySize); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0, 0, 0, 0)); // fully transparent fake window + ImGui::Begin("FullScreenWindow", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoScrollbar | ImGuiWindowFlags_NoScrollWithMouse | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoBringToFrontOnFocus | ImGuiWindowFlags_NoBackground | ImGuiWindowFlags_NoInputs); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + + ImGui::Image(info, contentRegionSize); + ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; + retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + + viewManipulateRight = cursorPos.x + contentRegionSize.x; + viewManipulateTop = cursorPos.y; + } + + ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); + retval.isGizmoBeingUsed = ImGuizmo::IsOver() || (ImGuizmo::IsUsing() && ImGui::IsMouseDown(ImGuiMouseButton_Left)); + + if(params.enableViewManipulate) + ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + ImGui::End(); + ImGui::PopStyleColor(); + + return retval; +} + +#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp new file mode 100644 index 000000000..b6d723e70 --- /dev/null +++ b/72_SolidAngleVisualizer/main.cpp @@ -0,0 +1,1105 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +#include "common.hpp" +#include "app_resources/hlsl/common.hlsl" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +/* +Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. + +Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations. +*/ +class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + + inline static std::string SolidAngleVisShaderPath = "app_resources/hlsl/SolidAngleVis.frag.hlsl"; +public: + inline SolidAngleVisualizer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ 2048,1024 }, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) { + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = { getGraphicsQueue()->getFamilyIndex() }; + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies + }, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch + ); + + // for the scene drawing pass + { + IGPURenderpass::SCreationParams params = {}; + const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp =*/ {IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp =*/ {IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout =*/ {IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout =*/ {IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; + params.depthStencilAttachments = depthAttachments; + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + { + .format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp =*/ IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp =*/ IGPURenderpass::STORE_OP::STORE, + /*.initialLayout =*/ IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout =*/ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd + }; + params.colorAttachments = colorAttachments; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd + }; + subpasses[0].depthStencilAttachment = { {.render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}} }; + subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} }; + params.subpasses = subpasses; + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT | PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + params.dependencies = dependencies; + auto solidAngleRenderpassParams = params; + m_mainRenderpass = m_device->createRenderpass(std::move(params)); + if (!m_mainRenderpass) + return logFail("Failed to create Main Renderpass!"); + + m_solidAngleRenderpass = m_device->createRenderpass(std::move(solidAngleRenderpassParams)); + if (!m_solidAngleRenderpass) + return logFail("Failed to create Solid Angle Renderpass!"); + + } + + const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), m_solidAngleRenderpass.get(), 0, { &geometries.front().get(),geometries.size() }); + // special case + { + const auto& pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto& name : m_scene->getInitParams().geometryNames) + { + if (name == "Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + // we'll only display one thing at a time + m_renderer->m_instances.resize(1); + + // Create graphics pipeline + { + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "") -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_FRAGMENT; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + core::vector defines; + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + + options.preprocessorOptions.extraDefines = defines; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(SolidAngleVisShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + const asset::SPushConstantRange ranges[] = { { + .stageFlags = hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + } }; + + auto visualizationLayout = m_device->createPipelineLayout( + ranges, + nullptr, + nullptr, + nullptr, + nullptr + ); + m_visualizationPipeline = fsTriProtoPPln.createPipeline(fragSpec, visualizationLayout.get(), m_solidAngleRenderpass.get()); + if (!m_visualizationPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + + // Create ImGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = { .setIx = 0u,.bindingIx = TexturesImGUIBindingIndex }; + params.resources.samplersInfo = { .setIx = 0u,.bindingIx = 1u }; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxImGUITextures); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + interface.imGUI = ext::imgui::UI::create(std::move(params)); + if (!interface.imGUI) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); + } + + // create rest of User Interface + { + auto* imgui = interface.imGUI.get(); + // create the suballocated descriptor set + { + // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* layout = interface.imGUI->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT, { &layout,1 }); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + if (!interface.subAllocDS) + return logFail("Failed to create the descriptor set"); + // make sure Texture Atlas slot is taken for eternity + { + auto dummy = SubAllocatedDescriptorSet::invalid_value; + interface.subAllocDS->multi_allocate(0, 1, &dummy); + assert(dummy == ext::imgui::UI::FontAtlasTexId); + } + // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(interface.imGUI->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({ &write,1 }, {})) + return logFail("Failed to write the descriptor set"); + } + imgui->registerListener([this]() {interface(); }); + } + + interface.camera.mapKeysToWASD(); + + onAppInitializedFinish(); + return true; + } + + // + virtual inline bool onAppTerminated() + { + SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId; + IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + interface.subAllocDS->multi_deallocate(dummy, TexturesImGUIBindingIndex, 1, &fontAtlasDescIx); + return device_base_t::onAppTerminated(); + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + // CPU events + update(nextPresentationTimestamp); + + const auto& virtualWindowRes = interface.transformReturnInfo.sceneResolution; + // TODO: check main frame buffer too + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualWindowRes[1]) + recreateFramebuffer(virtualWindowRes); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + if (m_solidAngleViewFramebuffer) + { + cb->beginDebugMarker("Draw Circle View Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_solidAngleViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {virtualWindowRes[0],virtualWindowRes[1]} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + PushConstants pc{ + .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), + .viewport = { 0.f,0.f,static_cast(virtualWindowRes[0]),static_cast(virtualWindowRes[1]) } + }; + auto pipeline = m_visualizationPipeline; + cb->bindGraphicsPipeline(pipeline.get()); + cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + //cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cb); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + // draw main view + if (m_mainViewFramebuffer) + { + cb->beginDebugMarker("Main Scene Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = m_mainViewFramebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {virtualWindowRes[0],virtualWindowRes[1]} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw scene + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + const auto& camera = interface.camera; + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix); + + // tear down scene every frame + auto& instance = m_renderer->m_instances[0]; + auto transposed = hlsl::transpose(interface.m_OBBModelMatrix); + memcpy(&instance.world, &transposed, sizeof(instance.world)); + instance.packedGeo = m_renderer->getGeometries().data();// +interface.gcIndex; + m_renderer->render(cb, viewParams); // draw the cube/OBB + + + // TODO: a better way to get identity matrix + float32_t3x4 origin = { + 0.2f,0.0f,0.0f,0.0f, + 0.0f,0.2f,0.0f,0.0f, + 0.0f,0.0f,0.2f,0.0f + }; + memcpy(&instance.world, &origin, sizeof(instance.world)); + instance.packedGeo = m_renderer->getGeometries().data() + 3; // sphere + m_renderer->render(cb, viewParams); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + { + cb->beginDebugMarker("SolidAngleVisualizer IMGUI Frame"); + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = + { + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + } + }; + beginRenderpass(cb, renderpassInfo); + } + // draw ImGUI + { + auto* imgui = interface.imGUI.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx + const auto* ds = interface.subAllocDS->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), imgui->getCreationParameters().resources.texturesInfo.setIx, 1u, &ds); + // a timepoint in the future to release streaming resources for geometry + const ISemaphore::SWaitInfo drawFinished = { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1u }; + if (!imgui->render(cb, drawFinished)) + { + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.", ILogger::ELL_ERROR); + return {}; + } + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + +private: + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + auto& camera = interface.camera; + camera.setMoveSpeed(interface.moveSpeed); + camera.setRotateSpeed(interface.rotateSpeed); + + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } uiEvents; + + // TODO: should be a member really + static std::chrono::microseconds previousEventTimestamp{}; + + // I think begin/end should always be called on camera, just events shouldn't be fed, why? + // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to + // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to + // `timeDiff` being computed since `lastVirtualUpTimeStamp` + camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (interface.move) + camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.mouse.emplace_back(e); + + //if (e.type == nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + //{ + // interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); + // interface.gcIndex = core::clamp(interface.gcIndex, 0ull, m_renderer->getGeometries().size() - 1); + //} + } + }, + m_logger.get() + ); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + //if (interface.move) + camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + uiEvents.keyboard.emplace_back(e); + } + }, + m_logger.get() + ); + } + camera.endInputProcessing(nextPresentationTimestamp); + + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard + }; + + //interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + interface.imGUI->update(params); + } + + void recreateFramebuffer(const uint16_t2 resolution) + { + auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr + { + auto image = m_device->createImage({ { + .type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x,resolution.y,1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::EUF_SAMPLED_BIT + } }); + if (!m_device->allocate(image->getMemoryReqs(), image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format + }; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT : IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; + + smart_refctd_ptr solidAngleView; + smart_refctd_ptr mainView; + // detect window minimization + if (resolution.x < 0x4000 && resolution.y < 0x4000) + { + solidAngleView = createImageAndView(finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(sceneRenderDepthFormat); + m_solidAngleViewFramebuffer = m_device->createFramebuffer({ { + .renderpass = m_solidAngleRenderpass, + .depthStencilAttachments = &solidAngleDepthView.get(), + .colorAttachments = &solidAngleView.get(), + .width = resolution.x, + .height = resolution.y + } }); + + mainView = createImageAndView(finalSceneRenderFormat); + auto mainDepthView = createImageAndView(sceneRenderDepthFormat); + m_mainViewFramebuffer = m_device->createFramebuffer({ { + .renderpass = m_mainRenderpass, + .depthStencilAttachments = &mainDepthView.get(), + .colorAttachments = &mainView.get(), + .width = resolution.x, + .height = resolution.y + } }); + + } + else + { + m_solidAngleViewFramebuffer = nullptr; + m_mainViewFramebuffer = nullptr; + } + + // release previous slot and its image + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + // + if (solidAngleView) + { + interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); + // update descriptor set + IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; + infos[0].desc = solidAngleView; + infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[1].desc = mainView; + infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { + {.dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .count = 1, + .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)] + }, + { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .count = 1, + .info = &infos[1] + } + }; + m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); + } + interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndices[CInterface::ERV_MAIN_VIEW]; + } + + inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info) + { + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->setScissor(0, 1, &info.renderArea); + const SViewport viewport = { + .x = 0, + .y = 0, + .width = static_cast(info.renderArea.extent.width), + .height = static_cast(info.renderArea.extent.height) + }; + cb->setViewport(0u, 1u, &viewport); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; + constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB; + constexpr static inline auto TexturesImGUIBindingIndex = 0u; + // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes + constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_solidAngleRenderpass; + smart_refctd_ptr m_mainRenderpass; + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_solidAngleViewFramebuffer; + smart_refctd_ptr m_mainViewFramebuffer; + smart_refctd_ptr m_visualizationPipeline; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // UI stuff + struct CInterface + { + void cameraToHome() + { + core::vectorSIMDf cameraPosition(-3.0f, 3.0f, 6.0f); + core::vectorSIMDf cameraTarget(0.f, 0.f, 6.f); + const static core::vectorSIMDf up(0.f, 1.f, 0.f); + + camera.setPosition(cameraPosition); + camera.setTarget(cameraTarget); + camera.setBackupUpVector(up); + + camera.recomputeViewMatrix(); + } + + void operator()() + { + ImGuiIO& io = ImGui::GetIO(); + + // TODO: why is this a lambda and not just an assignment in a scope ? + camera.setProjectionMatrix([&]() + { + matrix4SIMD projection; + + if (isPerspective) + if (isLH) + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + { + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + + if (isLH) + projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); + else + projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + } + + return projection; + }()); + + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Editor"); + + //if (ImGui::RadioButton("Full view", !transformParams.useWindow)) + // transformParams.useWindow = false; + + //ImGui::SameLine(); + + //if (ImGui::RadioButton("Window", transformParams.useWindow)) + // transformParams.useWindow = true; + + ImGui::Text("Camera"); + bool viewDirty = false; + + if (ImGui::RadioButton("LH", isLH)) + isLH = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("RH", !isLH)) + isLH = false; + + if (ImGui::RadioButton("Perspective", isPerspective)) + isPerspective = true; + + ImGui::SameLine(); + + if (ImGui::RadioButton("Orthographic", !isPerspective)) + isPerspective = false; + + ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); + //ImGui::Checkbox("Enable camera movement", &move); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + + // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case + + if (isPerspective) + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + else + ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); + + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + + viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); + + if (viewDirty || firstFrame) + { + cameraToHome(); + } + firstFrame = false; + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); + + /* + * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout + * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection + + - VIEW: + + ImGuizmo + + | X[0] Y[0] Z[0] 0.0f | + | X[1] Y[1] Z[1] 0.0f | + | X[2] Y[2] Z[2] 0.0f | + | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | + + Nabla + + | X[0] X[1] X[2] -Dot(X, eye) | + | Y[0] Y[1] Y[2] -Dot(Y, eye) | + | Z[0] Z[1] Z[2] -Dot(Z, eye) | + + = transpose(nbl::core::matrix4SIMD()) + + - PERSPECTIVE [PROJECTION CASE]: + + ImGuizmo + + | (temp / temp2) (0.0) (0.0) (0.0) | + | (0.0) (temp / temp3) (0.0) (0.0) | + | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | + | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | + + Nabla + + | w (0.0) (0.0) (0.0) | + | (0.0) -h (0.0) (0.0) | + | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | + | (0.0) (0.0) (-1.0) (0.0) | + + = transpose() + + * + * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, + * note it also modifies input view matrix but projection matrix is immutable + */ + + if (ImGui::IsKeyPressed(ImGuiKey_Home)) + { + cameraToHome(); + } + + if (ImGui::IsKeyPressed(ImGuiKey_End)) + { + m_OBBModelMatrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 12.0f, 1.0f + }; + } + + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; + + ImGuizmo::SetID(0u); + + // TODO: camera will return hlsl::float32_tMxN + auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); + imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); + imguizmoM16InOut.model = m_OBBModelMatrix; + + { + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + + transformParams.editTransformDecomposition = true; + transformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + + // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used + move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || transformReturnInfo.isGizmoWindowHovered) && (!transformReturnInfo.isGizmoBeingUsed); + } + + // to Nabla + update camera & model matrices + // TODO: make it more nicely, extract: + // - Position by computing inverse of the view matrix and grabbing its translation + // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position + // But then set the view matrix this way anyway, because up-vector may not be compatible + //const auto& view = camera.getViewMatrix(); + //const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + m_OBBModelMatrix = imguizmoM16InOut.model; + + // object meta display + //{ + // ImGui::Begin("Object"); + // ImGui::Text("type: \"%s\"", objectName.data()); + // ImGui::End(); + //} + + // solid angle view window + { + ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); + static bool isOpen = true; + ImGui::Begin("Solid angle view", &isOpen, 0); + + ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); + ImGui::End(); + } + + // view matrices editor + { + ImGui::Begin("Matrices"); + + auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true) + { + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) + { + for (int y = 0; y < rows; ++y) + { + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) + { + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); + } + } + ImGui::EndTable(); + } + + if (withSeparator) + ImGui::Separator(); + }; + + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer()); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false); + + ImGui::End(); + } + + // Nabla Imgui backend MDI buffer info + // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, + // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. + { + auto* streaminingBuffer = imGUI->getStreamingBuffer(); + + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + + float freePercentage = 100.0f * (float)(freeSize) / (float)total; + float allocatedPercentage = (float)(consumedMemory) / (float)total; + + ImVec2 barSize = ImVec2(400, 30); + float windowPadding = 10.0f; + float verticalPadding = ImGui::GetStyle().FramePadding.y; + + ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); + ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); + + ImGui::Text("Total Allocated Size: %zu bytes", total); + ImGui::Text("In use: %zu bytes", consumedMemory); + ImGui::Text("Buffer Usage:"); + + ImGui::SetCursorPosX(windowPadding); + + if (freePercentage > 70.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + else if (freePercentage > 30.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + else + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + + ImGui::ProgressBar(allocatedPercentage, barSize, ""); + + ImGui::PopStyleColor(); + + ImDrawList* drawList = ImGui::GetWindowDrawList(); + + ImVec2 progressBarPos = ImGui::GetItemRectMin(); + ImVec2 progressBarSize = ImGui::GetItemRectSize(); + + const char* text = "%.2f%% free"; + char textBuffer[64]; + snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); + + ImVec2 textSize = ImGui::CalcTextSize(textBuffer); + ImVec2 textPos = ImVec2 + ( + progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f + ); + + ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + drawList->AddRectFilled + ( + ImVec2(textPos.x - 5, textPos.y - 2), + ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), + ImGui::GetColorU32(bgColor) + ); + + ImGui::SetCursorScreenPos(textPos); + ImGui::Text("%s", textBuffer); + + ImGui::Dummy(ImVec2(0.0f, verticalPadding)); + + ImGui::End(); + } + ImGui::End(); + } + + smart_refctd_ptr imGUI; + + // descriptor set + smart_refctd_ptr subAllocDS; + enum E_RENDER_VIEWS : uint8_t + { + ERV_MAIN_VIEW, + ERV_SOLID_ANGLE_VIEW, + Count + }; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = { SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value }; + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + float32_t4x4 m_OBBModelMatrix{ + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 12.0f, 1.0f + }; + + //std::string_view objectName; + TransformRequestParams transformParams; + TransformReturnInfo transformReturnInfo; + + float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 90.f / 180.f * 3.14159f; + float camXAngle = 0.f / 180.f * 3.14159f; + //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; + bool firstFrame = true; + } interface; +}; + +NBL_MAIN_FUNC(SolidAngleVisualizer) \ No newline at end of file diff --git a/72_SolidAngleVisualizer/pipeline.groovy b/72_SolidAngleVisualizer/pipeline.groovy new file mode 100644 index 000000000..7b7c9702a --- /dev/null +++ b/72_SolidAngleVisualizer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CUIBuilder extends IBuilder +{ + public CUIBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CUIBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/72_SolidAngleVisualizer/src/transform.cpp b/72_SolidAngleVisualizer/src/transform.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/CMakeLists.txt b/CMakeLists.txt index 574925e97..fddafdac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) + add_subdirectory(72_SolidAngleVisualizer) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 4969227114e9fa0775d65ca6ddc960d381da92a3 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Sun, 16 Nov 2025 00:23:29 -0300 Subject: [PATCH 24/57] Work on cooperative binary search --- 72_CooperativeBinarySearch/CMakeLists.txt | 24 ++ .../app_resources/binarySearch.comp.hlsl | 20 ++ .../app_resources/common.h | 19 ++ .../app_resources/present.frag.hlsl | 19 ++ .../config.json.template | 28 +++ .../include/nbl/this_example/common.hpp | 11 + 72_CooperativeBinarySearch/main.cpp | 232 ++++++++++++++++++ 72_CooperativeBinarySearch/pipeline.groovy | 50 ++++ CMakeLists.txt | 1 + 9 files changed, 404 insertions(+) create mode 100644 72_CooperativeBinarySearch/CMakeLists.txt create mode 100644 72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl create mode 100644 72_CooperativeBinarySearch/app_resources/common.h create mode 100644 72_CooperativeBinarySearch/app_resources/present.frag.hlsl create mode 100644 72_CooperativeBinarySearch/config.json.template create mode 100644 72_CooperativeBinarySearch/include/nbl/this_example/common.hpp create mode 100644 72_CooperativeBinarySearch/main.cpp create mode 100644 72_CooperativeBinarySearch/pipeline.groovy diff --git a/72_CooperativeBinarySearch/CMakeLists.txt b/72_CooperativeBinarySearch/CMakeLists.txt new file mode 100644 index 000000000..b7e52875d --- /dev/null +++ b/72_CooperativeBinarySearch/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl new file mode 100644 index 000000000..f44a35b21 --- /dev/null +++ b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl @@ -0,0 +1,20 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(compute) + +#include "common.h" +using namespace nbl::hlsl; + +[[vk::push_constant]] ConstantBuffer Constants; +[[vk::binding(0)]] StructuredBuffer Histogram; +[[vk::binding(1)]] RWStructuredBuffer Output; + +static const uint32_t GroupsharedSize = 256; + +[numthreads(256, 1, 1)] +void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID) +{ + +} \ No newline at end of file diff --git a/72_CooperativeBinarySearch/app_resources/common.h b/72_CooperativeBinarySearch/app_resources/common.h new file mode 100644 index 000000000..4a3cacaa4 --- /dev/null +++ b/72_CooperativeBinarySearch/app_resources/common.h @@ -0,0 +1,19 @@ +#ifndef _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ +#define _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ + +#include +#include + +using namespace nbl::hlsl; +namespace nbl { +namespace hlsl { + +struct PushConstants +{ + uint32_t EntityCount; +}; + +}; +}; + +#endif // _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ diff --git a/72_CooperativeBinarySearch/app_resources/present.frag.hlsl b/72_CooperativeBinarySearch/app_resources/present.frag.hlsl new file mode 100644 index 000000000..22695657c --- /dev/null +++ b/72_CooperativeBinarySearch/app_resources/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); +} \ No newline at end of file diff --git a/72_CooperativeBinarySearch/config.json.template b/72_CooperativeBinarySearch/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/72_CooperativeBinarySearch/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..3745ca512 --- /dev/null +++ b/72_CooperativeBinarySearch/include/nbl/this_example/common.hpp @@ -0,0 +1,11 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + +#include "nbl/examples/examples.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ? +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp new file mode 100644 index 000000000..fda1a63c1 --- /dev/null +++ b/72_CooperativeBinarySearch/main.cpp @@ -0,0 +1,232 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/system/IApplicationFramework.h" +#include "app_resources/common.h" + +#include +#include +#include + + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +//using namespace glm; + +void cpu_tests(); + +class CooperativeBinarySearch final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; +public: + CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_queue = m_device->getQueue(0, 0); + m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); + + smart_refctd_ptr shader; + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset("app_resources/binarySearch.comp.hlsl", lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return logFail("Could not load shader!"); + + auto source = IAsset::castDown(assets[0]); + // The down-cast should not fail! + assert(source); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + shader = m_device->compileShader({ source.get() }); + if (!shader) + return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + } + + const uint32_t bindingCount = 2u; + IGPUDescriptorSetLayout::SBinding bindings[bindingCount] = {}; + bindings[0].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(0)]] StructuredBuffer Histogram; + bindings[1].type = IDescriptor::E_TYPE::ET_STORAGE_BUFFER; // [[vk::binding(1)]] RWStructuredBuffer Output; + + for(int i = 0; i < bindingCount; ++i) + { + bindings[i].stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + bindings[i].count = 1; + bindings[i].binding = i; + } + m_descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + { + SPushConstantRange pcRange = {}; + pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; + pcRange.offset = 0u; + pcRange.size = 2 * sizeof(uint32_t); + auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) + return logFail("Failed to create compute pipeline!\n"); + } + + for (uint32_t i = 0; i < bindingCount; i++) + { + m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { + {.size = 500000, .usage = + IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | + IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, + } + }); + + auto reqs = m_buffers[i]->getMemoryReqs(); + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); + m_device->allocate(reqs, m_buffers[i].get()); + } + + smart_refctd_ptr descriptorPool = nullptr; + { + IDescriptorPool::SCreateInfo createInfo = {}; + createInfo.maxSets = 1; + createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 1; + descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); + } + + m_descriptorSet = descriptorPool->createDescriptorSet(smart_refctd_ptr(m_descriptorSetLayout)); + + IGPUDescriptorSet::SDescriptorInfo descriptorInfos[bindingCount] = {}; + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSets[bindingCount] = {}; + + for(int i = 0; i < bindingCount; ++i) + { + writeDescriptorSets[i].info = &descriptorInfos[i]; + writeDescriptorSets[i].dstSet = m_descriptorSet.get(); + writeDescriptorSets[i].binding = i; + writeDescriptorSets[i].count = bindings[i].count; + + descriptorInfos[i].desc = m_buffers[i]; + descriptorInfos[i].info.buffer.size = ~0ull; + } + + m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); + + // In contrast to fences, we just need one semaphore to rule all dispatches + return true; + } + + void onAppTerminated_impl() override + { + m_device->waitIdle(); + } + + void workLoopBody() override + { + cpu_tests(); + + constexpr auto StartedValue = 0; + + smart_refctd_ptr progress = m_device->createSemaphore(StartedValue); + + m_cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t layoutBufferBarrier[1] = { { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::HOST_BIT, + .srcAccessMask = ACCESS_FLAGS::HOST_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + // whole buffer because we transferred the contents into it + .range = {.offset = 0,.size = m_buffers[1]->getCreationParams().size,.buffer = m_buffers[1]} + } }; + + const IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo = { .bufBarriers = layoutBufferBarrier }; + m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); + + + const uint32_t pushConstants[2] = { 1920, 1080 }; + const IGPUDescriptorSet* set = m_descriptorSet.get(); + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); + m_cmdbuf->dispatch(240, 135, 1u); + + layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); + m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); + + m_cmdbuf->end(); + + { + constexpr auto FinishedValue = 69; + IQueue::SSubmitInfo submitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = m_cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = progress.get(),.value = FinishedValue,.stageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT} }; + submitInfos[0].signalSemaphores = signals; + m_api->startCapture(); + m_queue->submit(submitInfos); + m_api->endCapture(); + const ISemaphore::SWaitInfo waitInfos[] = { { + .semaphore = progress.get(), + .value = FinishedValue + } }; + m_device->blockForSemaphores(waitInfos); + } + + auto mem = m_buffers[1]->getBoundMemory(); + assert(mem.memory->isMappable()); + auto* ptr = mem.memory->map({ .offset = 0, .length = mem.memory->getAllocationSize() }); + printf("readback ptr %p\n", ptr); + + m_keepRunning = false; + } + + bool keepRunning() override + { + return m_keepRunning; + } + + +private: + smart_refctd_ptr m_pipeline = nullptr; + smart_refctd_ptr m_descriptorSetLayout; + smart_refctd_ptr m_descriptorSet; + + smart_refctd_ptr m_buffers[2]; + smart_refctd_ptr m_cmdbuf = nullptr; + IQueue* m_queue; + smart_refctd_ptr m_commandPool; + uint64_t m_iteration = 0; + constexpr static inline uint64_t MaxIterations = 200; + + bool m_keepRunning = true; +}; + +NBL_MAIN_FUNC(CooperativeBinarySearch) + +void cpu_tests() +{ +} diff --git a/72_CooperativeBinarySearch/pipeline.groovy b/72_CooperativeBinarySearch/pipeline.groovy new file mode 100644 index 000000000..eb20d0c5a --- /dev/null +++ b/72_CooperativeBinarySearch/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CComputeShaderPathTracerBuilder extends IBuilder +{ + public CComputeShaderPathTracerBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CComputeShaderPathTracerBuilder(_agent, _info) +} + +return this diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ce94f93..39f3275ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) + add_subdirectory(72_CooperativeBinarySearch) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From e54642803cd47e47adfe9a20318ca8c634c86643 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 3 Dec 2025 18:32:45 -0300 Subject: [PATCH 25/57] Patch things for cooperative binary search test --- .../app_resources/binarySearch.comp.hlsl | 103 +- 72_CooperativeBinarySearch/main.cpp | 28 +- 72_CooperativeBinarySearch/testCaseData.h | 1192 +++++++++++++++++ 3 files changed, 1316 insertions(+), 7 deletions(-) create mode 100644 72_CooperativeBinarySearch/testCaseData.h diff --git a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl index f44a35b21..05c0d8464 100644 --- a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl +++ b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl @@ -5,16 +5,115 @@ #pragma wave shader_stage(compute) #include "common.h" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl" using namespace nbl::hlsl; -[[vk::push_constant]] ConstantBuffer Constants; +[[vk::push_constant]] PushConstants Constants; [[vk::binding(0)]] StructuredBuffer Histogram; [[vk::binding(1)]] RWStructuredBuffer Output; static const uint32_t GroupsharedSize = 256; +uint getNextPowerOfTwo(uint number) { + return 2 << firstbithigh(number - 1); +} + +uint getLaneWithFirstBitSet(bool condition) { + uint4 ballot = WaveActiveBallot(condition); + if (all(ballot == 0)) { + return WaveGetLaneCount(); + } + return nbl::hlsl::glsl::subgroupBallotFindLSB(ballot); +} + +// findValue must be the same across the entire wave +// Could use something like WaveReadFirstLane to be fully sure +uint binarySearchLowerBoundFindValue(uint findValue, StructuredBuffer searchBuffer, uint searchBufferSize) { + uint lane = WaveGetLaneIndex(); + + uint left = 0; + uint right = searchBufferSize - 1; + + uint32_t range = getNextPowerOfTwo(right - left); + // do pivots as long as we can't coalesced load + while (range > WaveGetLaneCount()) + { + // there must be at least 1 gap between subsequent pivots + const uint32_t step = range / WaveGetLaneCount(); + const uint32_t halfStep = step >> 1; + const uint32_t pivotOffset = lane * step+halfStep; + const uint32_t pivotIndex = left + pivotOffset; + + uint4 notGreaterPivots = WaveActiveBallot(pivotIndex < right && !(findValue < searchBuffer[pivotIndex])); + uint partition = nbl::hlsl::glsl::subgroupBallotBitCount(notGreaterPivots); + // only move left if needed + if (partition != 0) + left += partition * step - halfStep; + // if we go into final half partition, the range becomes less too + range = partition != WaveGetLaneCount() ? step : halfStep; + } + + uint threadSearchIndex = left + lane; + bool laneValid = threadSearchIndex < searchBufferSize; + uint histAtIndex = laneValid ? searchBuffer[threadSearchIndex] : -1; + uint firstLaneGreaterThan = getLaneWithFirstBitSet(histAtIndex > findValue); + + return left + firstLaneGreaterThan - 1; +} + +groupshared uint shared_groupSearchBufferMinIndex; +groupshared uint shared_groupSearchBufferMaxIndex; +groupshared uint shared_groupSearchValues[GroupsharedSize]; + +// Binary search using the entire workgroup, making it log32 or log64 (every iteration, the possible set of +// values is divided by the number of lanes in a wave) +uint binarySearchLowerBoundCooperative(uint groupIndex, uint groupThread, StructuredBuffer searchBuffer, uint searchBufferSize) { + uint minSearchValue = groupIndex.x * GroupsharedSize; + uint maxSearchValue = ((groupIndex.x + 1) * GroupsharedSize) - 1; + + // On each workgroup, two subgroups do the search + // - One searches for the minimum, the other searches for the maximum + // - Store the minimum and maximum on groupshared memory, then do a barrier + uint wave = groupThread / WaveGetLaneCount(); + if (wave < 2) { + uint search = wave == 0 ? minSearchValue : maxSearchValue; + uint searchResult = binarySearchLowerBoundFindValue(search, searchBuffer, searchBufferSize); + if (WaveIsFirstLane()) { + if (wave == 0) shared_groupSearchBufferMinIndex = searchResult; + else shared_groupSearchBufferMaxIndex = searchResult; + } + } + GroupMemoryBarrierWithGroupSync(); + + // Since every instance has at least one triangle, we know that having workgroup values + // for each value in the range of minimum to maximum will suffice. + + // Write every value in the range to groupshared memory and barrier. + uint idx = shared_groupSearchBufferMinIndex + groupThread.x; + if (idx <= shared_groupSearchBufferMaxIndex) { + shared_groupSearchValues[groupThread.x] = searchBuffer[idx]; + } + GroupMemoryBarrierWithGroupSync(); + + uint maxValueIndex = shared_groupSearchBufferMaxIndex - shared_groupSearchBufferMinIndex; + + uint searchValue = minSearchValue + groupThread; + uint currentSearchValueIndex = 0; + uint laneValue = shared_groupSearchBufferMaxIndex; + while (currentSearchValueIndex <= maxValueIndex) { + uint curValue = shared_groupSearchValues[currentSearchValueIndex]; + if (curValue > searchValue) { + laneValue = shared_groupSearchBufferMinIndex + currentSearchValueIndex - 1; + break; + } + currentSearchValueIndex ++; + } + + return laneValue; +} + [numthreads(256, 1, 1)] void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID) { - + Output[thread.x] = binarySearchLowerBoundCooperative(group.x, groupThread.x, Histogram, Constants.EntityCount); } \ No newline at end of file diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index fda1a63c1..e2611dea7 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -22,6 +22,11 @@ using namespace nbl::examples; //using namespace glm; +static constexpr uint32_t TestCaseIndices[] = { +#include "testCaseData.h" +}; + + void cpu_tests(); class CooperativeBinarySearch final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication @@ -101,14 +106,19 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp auto reqs = m_buffers[i]->getMemoryReqs(); reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getHostVisibleMemoryTypeBits(); - m_device->allocate(reqs, m_buffers[i].get()); + + m_allocations[i] = m_device->allocate(reqs, m_buffers[i].get()); + + auto allocationType = i == 0 ? IDeviceMemoryAllocation::EMCAF_WRITE : IDeviceMemoryAllocation::EMCAF_READ; + auto mapResult = m_allocations[i].memory->map({ 0ull,m_allocations[i].memory->getAllocationSize() }, allocationType); + assert(mapResult); } smart_refctd_ptr descriptorPool = nullptr; { IDescriptorPool::SCreateInfo createInfo = {}; createInfo.maxSets = 1; - createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 1; + createInfo.maxDescriptorCount[static_cast(IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = bindingCount; descriptorPool = m_device->createDescriptorPool(std::move(createInfo)); } @@ -130,6 +140,14 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp m_device->updateDescriptorSets(bindingCount, writeDescriptorSets, 0u, nullptr); + // Write test data to the m_buffers[0] + auto outPtr = m_allocations[0].memory->getMappedPointer(); + assert(outPtr); + memcpy( + reinterpret_cast(outPtr), + reinterpret_cast(&TestCaseIndices[0]), + sizeof(TestCaseIndices)); + // In contrast to fences, we just need one semaphore to rule all dispatches return true; } @@ -196,9 +214,8 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp m_device->blockForSemaphores(waitInfos); } - auto mem = m_buffers[1]->getBoundMemory(); - assert(mem.memory->isMappable()); - auto* ptr = mem.memory->map({ .offset = 0, .length = mem.memory->getAllocationSize() }); + auto ptr = m_allocations[1].memory->getMappedPointer(); + assert(ptr); printf("readback ptr %p\n", ptr); m_keepRunning = false; @@ -216,6 +233,7 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp smart_refctd_ptr m_descriptorSet; smart_refctd_ptr m_buffers[2]; + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocations[2] = {}; smart_refctd_ptr m_cmdbuf = nullptr; IQueue* m_queue; smart_refctd_ptr m_commandPool; diff --git a/72_CooperativeBinarySearch/testCaseData.h b/72_CooperativeBinarySearch/testCaseData.h new file mode 100644 index 000000000..16153780e --- /dev/null +++ b/72_CooperativeBinarySearch/testCaseData.h @@ -0,0 +1,1192 @@ +0, +298, +554, +582, +912, +1074, +1076, +1078, +1170, +1188, +2140, +2414, +2736, +2738, +3980, +4800, +5898, +5900, +6936, +8106, +8152, +8650, +8844, +8930, +9504, +10244, +10826, +10828, +11126, +11430, +12206, +13764, +14010, +15302, +15624, +15656, +16414, +16494, +17368, +17432, +18312, +18948, +19376, +19818, +20146, +20604, +21240, +22446, +23482, +24914, +25042, +25538, +26764, +27564, +27566, +28472, +29450, +30202, +31474, +32160, +32676, +33792, +33794, +34704, +36540, +37456, +37950, +38364, +39274, +40442, +40518, +41412, +41590, +41950, +42022, +42714, +43464, +43790, +43792, +44876, +44878, +46188, +46572, +47352, +47650, +48242, +49856, +49858, +50506, +50968, +50970, +51152, +51154, +52870, +52884, +53332, +53334, +53904, +53964, +53966, +53968, +53970, +53972, +53974, +53976, +53978, +53980, +54514, +54516, +54518, +54520, +54762, +55866, +56462, +56478, +56480, +56482, +57510, +57568, +57570, +57572, +57846, +57848, +58760, +59408, +59438, +60198, +60200, +60202, +60204, +60284, +60938, +61274, +61720, +62296, +63116, +63378, +63380, +63382, +63384, +63386, +63388, +63904, +64572, +65142, +65144, +65146, +65554, +65738, +66052, +67016, +67424, +67566, +68270, +68272, +68610, +69240, +69870, +70988, +72622, +73258, +73260, +73580, +74524, +74880, +74958, +74960, +74962, +75114, +75116, +75622, +77144, +77798, +77800, +78314, +79566, +79568, +79570, +79572, +79850, +79852, +81576, +81684, +81686, +82492, +82494, +82496, +82498, +83990, +84860, +84988, +84990, +85138, +85772, +86120, +86122, +86564, +87402, +87404, +87602, +88676, +88714, +88780, +89560, +89732, +90786, +91128, +91130, +91272, +91522, +91804, +92588, +92590, +92834, +93268, +93736, +94448, +94704, +94706, +95074, +95076, +96706, +97040, +97770, +98000, +98676, +99968, +100074, +100318, +100602, +100914, +101020, +101872, +101878, +103078, +104246, +104266, +105436, +106332, +106954, +107856, +108954, +110320, +110780, +111588, +111882, +112502, +112676, +113496, +114070, +115204, +115422, +115424, +115858, +116420, +117426, +118504, +118870, +119296, +119618, +119650, +120408, +120488, +121362, +121426, +122306, +122942, +123370, +123812, +124140, +124598, +125234, +126440, +127476, +128908, +129036, +129532, +130758, +131558, +131560, +132466, +133444, +134196, +135468, +136154, +136670, +137786, +137788, +138698, +140534, +140832, +141608, +142422, +143220, +143468, +143714, +144504, +145078, +145670, +146224, +146874, +147726, +148692, +149536, +151032, +151126, +153382, +154128, +155190, +155212, +156324, +156484, +156526, +157026, +158242, +158446, +158448, +158594, +159256, +160350, +160444, +161040, +161624, +162418, +162524, +162768, +163052, +163364, +163470, +164322, +164328, +165528, +166696, +166716, +167886, +168782, +169404, +170306, +171404, +172770, +173230, +174038, +174332, +174952, +175126, +175946, +176520, +177654, +177872, +177874, +178308, +178870, +179876, +180954, +181320, +181746, +182160, +183070, +184238, +184314, +185208, +185386, +185746, +185818, +186510, +187260, +187586, +187588, +188672, +188674, +189984, +190368, +191148, +191446, +192038, +193652, +193654, +194302, +194764, +194766, +194948, +194950, +196666, +196680, +197128, +197130, +197700, +198048, +198824, +199638, +200436, +200684, +200930, +201720, +202294, +202886, +203440, +204090, +204942, +205908, +206752, +208248, +208342, +210598, +211344, +212406, +212428, +213540, +213700, +213742, +214242, +215458, +215662, +215664, +215810, +216472, +217566, +217660, +218256, +218316, +218318, +218320, +218322, +218324, +218326, +218328, +218330, +218332, +218866, +218868, +218870, +218872, +219114, +220218, +220814, +220830, +220832, +220834, +221862, +221920, +221922, +221924, +222198, +222200, +223112, +223760, +223790, +224550, +224552, +224554, +224556, +225140, +225794, +226130, +226576, +227152, +227972, +228234, +228236, +228238, +228240, +228242, +228244, +228760, +229428, +229998, +230000, +230002, +230410, +230594, +230908, +231872, +232280, +232422, +233126, +233128, +233466, +234096, +234726, +235844, +237478, +238114, +238116, +238512, +239256, +239812, +240660, +241950, +243244, +243366, +244346, +244412, +244710, +245202, +246504, +246728, +246988, +247592, +248630, +249562, +250962, +251964, +252562, +253140, +253412, +254672, +255276, +256084, +256160, +256378, +257104, +257602, +257776, +258240, +258556, +258614, +259208, +260496, +261202, +261398, +262284, +262610, +262976, +263578, +264622, +265558, +266692, +266756, +268110, +268994, +269158, +269718, +270388, +270768, +271098, +271786, +272398, +272996, +273140, +273612, +274226, +274660, +275070, +275416, +275634, +275680, +276088, +276408, +276410, +276852, +277690, +277692, +277890, +278964, +279002, +279068, +279848, +280020, +281074, +281416, +281418, +281560, +281810, +282092, +282876, +282878, +283122, +283556, +284024, +284736, +284992, +284994, +285362, +285364, +286994, +287328, +288058, +288288, +288964, +289708, +289746, +290266, +291136, +292152, +292740, +292834, +293708, +293768, +293936, +294846, +295028, +295040, +295130, +295372, +296154, +296736, +297250, +297606, +298068, +298310, +299420, +300362, +301176, +301502, +301878, +302702, +303576, +303896, +305170, +305928, +306070, +306150, +307094, +307450, +307528, +307530, +307532, +307684, +307686, +308192, +309714, +310368, +310370, +310884, +312136, +312138, +312140, +312142, +312420, +312422, +314146, +314254, +314256, +315062, +315064, +315066, +315068, +316560, +317430, +317558, +317560, +317708, +318342, +319182, +319992, +320612, +320956, +321068, +321076, +322784, +322914, +323106, +324036, +324708, +326092, +326994, +327332, +328080, +328444, +329022, +329256, +330454, +331304, +331610, +332432, +332440, +333298, +334300, +334478, +334622, +335370, +335818, +336456, +336618, +337930, +338932, +339158, +339258, +339746, +340226, +340254, +340256, +340988, +341638, +342674, +343168, +343440, +344024, +344026, +344106, +345118, +346124, +347350, +348560, +348878, +349066, +350192, +350840, +351388, +353610, +354562, +355208, +356084, +356966, +358222, +359304, +359470, +360054, +360710, +360920, +361896, +362930, +362962, +363128, +363234, +363272, +363284, +363456, +363732, +364418, +364926, +365096, +365170, +365920, +366796, +367838, +368232, +368940, +369508, +369530, +370886, +371156, +371348, +372384, +372680, +372690, +373252, +373676, +374168, +374424, +374452, +374782, +374944, +374946, +374948, +375040, +375058, +376010, +376284, +376606, +376608, +377850, +378670, +379768, +379770, +380806, +381976, +382022, +382520, +382714, +382800, +383374, +384114, +384696, +384698, +384996, +385300, +386076, +387634, +387880, +388796, +389290, +389302, +389314, +389338, +389406, +389434, +389470, +389840, +389952, +390908, +391076, +391188, +392118, +392458, +392472, +392622, +392766, +393448, +394586, +394816, +394824, +395486, +396218, +396880, +396910, +397066, +397076, +397124, +397678, +398050, +399160, +400080, +401696, +401762, +402400, +402500, +402512, +403152, +404038, +404444, +404648, +404740, +405322, +406252, +407076, +408252, +408634, +409354, +410112, +411138, +411672, +411880, +412232, +412926, +412956, +413864, +414624, +415770, +415978, +417234, +417256, +417264, +418562, +418812, +418824, +418836, +418860, +418928, +418956, +418992, +419362, +419474, +420430, +420598, +420710, +421640, +421980, +421994, +422144, +422288, +422970, +424108, +424338, +424346, +425008, +425740, +426402, +426432, +426588, +426598, +426646, +427200, +427572, +428682, +429602, +430346, +430412, +431050, +431150, +431162, +431802, +432688, +433094, +433298, +433390, +433972, +434902, +435726, +436902, +437284, +438004, +438762, +439788, +440322, +440530, +440882, +441576, +441606, +442514, +443274, +444420, +444628, +445884, +445906, +445914, +447212, +447462, +448464, +448690, +448790, +449278, +449758, +449786, +449788, +450520, +451170, +452206, +452700, +452972, +453556, +453558, +453638, +454650, +455656, +456882, +458092, +458410, +458598, +459724, +460372, +460920, +463142, +464094, +464740, +465616, +466498, +467754, +468836, +469002, +469586, +470180, +471468, +472174, +472370, +473256, +473582, +473948, +474550, +475594, +476530, +477664, +477728, +479082, +479966, +480130, +480690, +481360, +481740, +482070, +482758, +483370, +483968, +484112, +484584, +485198, +485632, +486042, +486388, +486606, +486652, +487060, +488676, +489420, +489976, +490824, +492114, +493408, +493530, +494510, +494576, +494874, +495366, +496668, +496892, +497152, +497756, +498794, +499726, +501126, +502128, +502726, +503304, +503576, +504836, +505440, +506248, +506324, +506542, +507268, +507766, +507940, +508404, +508720, +509514, +510170, +510380, +511356, +512390, +512422, +512588, +512694, +512732, +512744, +512916, +513192, +513878, +514386, +514556, +514630, +515380, +516256, +517298, +517692, +518400, +518968, +518990, +520346, +520616, +520808, +521844, +522140, +522150, +522712, +523136, +523628, +524468, +525278, +525898, +526242, +526354, +526362, +528070, +528200, +528392, +529322, +529994, +531378, +532280, +532618, +533366, +533730, +534308, +534542, +535740, +536590, +536896, +537718, +537726, +538584, +539586, +539764, +539908, +540656, +541104, +541742, +541904, +543216, +543612, +543650, +544170, +545040, +546056, +546644, +546738, +547612, +547672, +547840, +548750, +548932, +548944, +549034, +549276, +550058, +550640, +551154, +551510, +551972, +552214, +553324, +554266, +555080, +555406, +555782, +556606, +557480, +557800, +559074, +559832, +559974, +550468, +551276, +552568, +552866, +553798, +554120, +554294, +555554, +556448, +556874, +557328, +557680, +558532, +559844, +560774, +561050, +561458, +562684, +563910, +564026, +564542, +565294, +565434, +566278, +567580, +568006, +568328, +569626, +570350, +570998, +572812, +573008, +573500, +573828, +573840, +573842, +574798, +576066, +576774, +577182, +577184, +577522, +577524, +578734, +579854, +579856, +581128, +581278, +582296, +583496, +583944, +584160, +584844, +584954, +584968, +585486, +586592, +586594, +587158, +587320, +588006, +589012, +590302, +590366, +590444, +590944, +581786, +582234, +582920, +582922, +564780, +565486, +565684, +566570, +566896, +567262, +567864, +568958, +570268, +570844, +572014, +573368, +574252, +574416, +574976, +575646, +576026, +576356, +577044, +577046, +577644, +577788, +578260, +578874, +579308, +579718, +580288, +580942, +581534, +581536, +576350, +576352 \ No newline at end of file From 5886b3024d761b087232da0e52aef4877481ef36 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 3 Dec 2025 20:51:18 -0300 Subject: [PATCH 26/57] Fix test --- 72_CooperativeBinarySearch/main.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index e2611dea7..828adf34f 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -85,7 +85,7 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp SPushConstantRange pcRange = {}; pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; pcRange.offset = 0u; - pcRange.size = 2 * sizeof(uint32_t); + pcRange.size = sizeof(nbl::hlsl::PushConstants); auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); @@ -186,11 +186,18 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp m_cmdbuf->pipelineBarrier(EDF_NONE, depInfo); - const uint32_t pushConstants[2] = { 1920, 1080 }; const IGPUDescriptorSet* set = m_descriptorSet.get(); + const uint32_t numIndices = sizeof(TestCaseIndices) / sizeof(TestCaseIndices[0]); + const uint32_t lastValue = TestCaseIndices[numIndices - 1]; + const uint32_t totalValues = lastValue + 100; + nbl::hlsl::PushConstants coopBinarySearchPC = { + .EntityCount = numIndices, + }; + m_cmdbuf->bindComputePipeline(m_pipeline.get()); m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); - m_cmdbuf->dispatch(240, 135, 1u); + m_cmdbuf->pushConstants(m_pipeline->getLayout(), nbl::hlsl::ShaderStage::ESS_COMPUTE, 0u, sizeof(nbl::hlsl::PushConstants), &coopBinarySearchPC); + m_cmdbuf->dispatch((totalValues + 255u) / 256u, 1u, 1u); layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); m_cmdbuf->pipelineBarrier(EDF_NONE,depInfo); @@ -216,7 +223,14 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp auto ptr = m_allocations[1].memory->getMappedPointer(); assert(ptr); - printf("readback ptr %p\n", ptr); + + uint32_t* valuesPtr = reinterpret_cast(ptr); + for (uint32_t i = 0; i < totalValues; i++) { + uint32_t value = valuesPtr[i]; + const uint32_t* binarySearchResult = std::upper_bound(TestCaseIndices, TestCaseIndices + numIndices, i); + uint32_t lowerBoundIndex = std::distance(TestCaseIndices, binarySearchResult) - 1; + assert(value == lowerBoundIndex); + } m_keepRunning = false; } From 795066393d9b7918991800b4dda5b482cc9085b3 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 3 Dec 2025 20:58:08 -0300 Subject: [PATCH 27/57] Remove unecessary leftover file --- 72_CooperativeBinarySearch/pipeline.groovy | 50 ---------------------- 1 file changed, 50 deletions(-) delete mode 100644 72_CooperativeBinarySearch/pipeline.groovy diff --git a/72_CooperativeBinarySearch/pipeline.groovy b/72_CooperativeBinarySearch/pipeline.groovy deleted file mode 100644 index eb20d0c5a..000000000 --- a/72_CooperativeBinarySearch/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CComputeShaderPathTracerBuilder extends IBuilder -{ - public CComputeShaderPathTracerBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CComputeShaderPathTracerBuilder(_agent, _info) -} - -return this From eb7d4fe788fb5e88b8b475c979586e050e202b00 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 5 Dec 2025 12:58:59 +0100 Subject: [PATCH 28/57] Removed forced -O3 optimization --- 05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt | 1 - 07_StagingAndMultipleQueues/CMakeLists.txt | 1 - 10_CountingSort/CMakeLists.txt | 1 - 11_FFT/CMakeLists.txt | 1 - 24_ColorSpaceTest/CMakeLists.txt | 1 - 62_CAD/CMakeLists.txt | 1 - 64_EmulatedFloatTest/CMakeLists.txt | 1 - 67_RayQueryGeometry/CMakeLists.txt | 1 - 70_FLIPFluids/CMakeLists.txt | 1 - 71_RayTracingPipeline/CMakeLists.txt | 1 - 10 files changed, 10 deletions(-) diff --git a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt index a342ac3d5..55ebaf41d 100644 --- a/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt +++ b/05_StreamingAndBufferDeviceAddressApp/CMakeLists.txt @@ -44,7 +44,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/07_StagingAndMultipleQueues/CMakeLists.txt b/07_StagingAndMultipleQueues/CMakeLists.txt index 19515454d..fe063be7c 100644 --- a/07_StagingAndMultipleQueues/CMakeLists.txt +++ b/07_StagingAndMultipleQueues/CMakeLists.txt @@ -44,7 +44,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/10_CountingSort/CMakeLists.txt b/10_CountingSort/CMakeLists.txt index 3acc73022..14bde428d 100644 --- a/10_CountingSort/CMakeLists.txt +++ b/10_CountingSort/CMakeLists.txt @@ -66,7 +66,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/11_FFT/CMakeLists.txt b/11_FFT/CMakeLists.txt index 9a2ee5a21..ca9fe8428 100644 --- a/11_FFT/CMakeLists.txt +++ b/11_FFT/CMakeLists.txt @@ -44,7 +44,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/24_ColorSpaceTest/CMakeLists.txt b/24_ColorSpaceTest/CMakeLists.txt index a2c5e752b..a2feb2cb8 100644 --- a/24_ColorSpaceTest/CMakeLists.txt +++ b/24_ColorSpaceTest/CMakeLists.txt @@ -55,7 +55,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/62_CAD/CMakeLists.txt b/62_CAD/CMakeLists.txt index dd181ff87..0928d3b61 100644 --- a/62_CAD/CMakeLists.txt +++ b/62_CAD/CMakeLists.txt @@ -107,7 +107,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/64_EmulatedFloatTest/CMakeLists.txt b/64_EmulatedFloatTest/CMakeLists.txt index 6470cdc74..af46da896 100644 --- a/64_EmulatedFloatTest/CMakeLists.txt +++ b/64_EmulatedFloatTest/CMakeLists.txt @@ -56,7 +56,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/67_RayQueryGeometry/CMakeLists.txt b/67_RayQueryGeometry/CMakeLists.txt index 503c5a31a..1fdfc03ce 100644 --- a/67_RayQueryGeometry/CMakeLists.txt +++ b/67_RayQueryGeometry/CMakeLists.txt @@ -48,7 +48,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/70_FLIPFluids/CMakeLists.txt b/70_FLIPFluids/CMakeLists.txt index 19a561f78..842492167 100644 --- a/70_FLIPFluids/CMakeLists.txt +++ b/70_FLIPFluids/CMakeLists.txt @@ -95,7 +95,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt index 5c853040e..d7bb13671 100644 --- a/71_RayTracingPipeline/CMakeLists.txt +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -110,7 +110,6 @@ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS -I "${CMAKE_CURRENT_SOURCE_DIR}" - -O3 -T lib_${SM} ) From e35e61dbb9b8ea91bbc42540cf58e9e92548dd27 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 5 Dec 2025 22:26:15 +0700 Subject: [PATCH 29/57] Example 73 to 15 and fix compile error --- {73_Mortons => 14_Mortons}/CMakeLists.txt | 0 14_Mortons/CTester.h | 405 ++++++++++++++++++ {73_Mortons => 14_Mortons}/ITester.h | 0 .../app_resources/common.hlsl | 41 ++ .../app_resources/test.comp.hlsl | 0 14_Mortons/app_resources/testCommon.hlsl | 253 +++++++++++ .../config.json.template | 0 {73_Mortons => 14_Mortons}/main.cpp | 0 {73_Mortons => 14_Mortons}/pipeline.groovy | 0 73_Mortons/CTester.h | 84 ++-- 73_Mortons/app_resources/testCommon.hlsl | 89 ++-- CMakeLists.txt | 2 +- 12 files changed, 793 insertions(+), 81 deletions(-) rename {73_Mortons => 14_Mortons}/CMakeLists.txt (100%) create mode 100644 14_Mortons/CTester.h rename {73_Mortons => 14_Mortons}/ITester.h (100%) rename {73_Mortons => 14_Mortons}/app_resources/common.hlsl (90%) rename {73_Mortons => 14_Mortons}/app_resources/test.comp.hlsl (100%) create mode 100644 14_Mortons/app_resources/testCommon.hlsl rename {73_Mortons => 14_Mortons}/config.json.template (100%) rename {73_Mortons => 14_Mortons}/main.cpp (100%) rename {73_Mortons => 14_Mortons}/pipeline.groovy (100%) diff --git a/73_Mortons/CMakeLists.txt b/14_Mortons/CMakeLists.txt similarity index 100% rename from 73_Mortons/CMakeLists.txt rename to 14_Mortons/CMakeLists.txt diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h new file mode 100644 index 000000000..4c8b4276e --- /dev/null +++ b/14_Mortons/CTester.h @@ -0,0 +1,405 @@ +#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ +#define _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ + +#include +#include "app_resources/testCommon.hlsl" +#include "ITester.h" + +using namespace nbl; + +class CTester final : public ITester +{ +public: + void performTests() + { + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + uint64_t generatedA = longDistribution(mt); + uint64_t generatedB = longDistribution(mt); + + testInput.generatedA = generatedA; + testInput.generatedB = generatedB; + + expected.emulatedAnd = _static_cast(generatedA & generatedB); + expected.emulatedOr = _static_cast(generatedA | generatedB); + expected.emulatedXor = _static_cast(generatedA ^ generatedB); + expected.emulatedNot = _static_cast(~generatedA); + expected.emulatedPlus = _static_cast(generatedA + generatedB); + expected.emulatedMinus = _static_cast(generatedA - generatedB); + expected.emulatedUnaryMinus = _static_cast(-generatedA); + expected.emulatedLess = uint32_t(generatedA < generatedB); + expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); + expected.emulatedGreater = uint32_t(generatedA > generatedB); + expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); + + expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); + expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); + expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); + } + { + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2 ); + uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2 ); + uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); + uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); + uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); + uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); + uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); + uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); + uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); + uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); + uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); + uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); + uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); + uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); + uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); + uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); + + // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them + // so their highest bits are all 0s or 1s depending on the sign of the number they encode + + int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); + int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); + int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); + + int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); + int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); + int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); + + int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); + int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); + int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + + // Plus + expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); + expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); + expected.mortonPlus_full_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + expected.mortonPlus_emulated_2 = createMortonFromU64Vec(Vec2AFull + Vec2BFull); + + expected.mortonPlus_small_3 = createMortonFromU64Vec(Vec3ASmall + Vec3BSmall); + expected.mortonPlus_medium_3 = createMortonFromU64Vec(Vec3AMedium + Vec3BMedium); + expected.mortonPlus_full_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + expected.mortonPlus_emulated_3 = createMortonFromU64Vec(Vec3AFull + Vec3BFull); + + expected.mortonPlus_small_4 = createMortonFromU64Vec(Vec4ASmall + Vec4BSmall); + expected.mortonPlus_medium_4 = createMortonFromU64Vec(Vec4AMedium + Vec4BMedium); + expected.mortonPlus_full_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + expected.mortonPlus_emulated_4 = createMortonFromU64Vec(Vec4AFull + Vec4BFull); + + // Minus + expected.mortonMinus_small_2 = createMortonFromU64Vec(Vec2ASmall - Vec2BSmall); + expected.mortonMinus_medium_2 = createMortonFromU64Vec(Vec2AMedium - Vec2BMedium); + expected.mortonMinus_full_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + expected.mortonMinus_emulated_2 = createMortonFromU64Vec(Vec2AFull - Vec2BFull); + + expected.mortonMinus_small_3 = createMortonFromU64Vec(Vec3ASmall - Vec3BSmall); + expected.mortonMinus_medium_3 = createMortonFromU64Vec(Vec3AMedium - Vec3BMedium); + expected.mortonMinus_full_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + expected.mortonMinus_emulated_3 = createMortonFromU64Vec(Vec3AFull - Vec3BFull); + + expected.mortonMinus_small_4 = createMortonFromU64Vec(Vec4ASmall - Vec4BSmall); + expected.mortonMinus_medium_4 = createMortonFromU64Vec(Vec4AMedium - Vec4BMedium); + expected.mortonMinus_full_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + expected.mortonMinus_emulated_4 = createMortonFromU64Vec(Vec4AFull - Vec4BFull); + + // Coordinate-wise equality + expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); + expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); + expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); + + expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); + expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); + expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); + + expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); + expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); + expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + + // Coordinate-wise unsigned inequality (just testing with less) + expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); + expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); + expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); + + expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); + expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); + expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); + + expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); + expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); + expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + // Coordinate-wise signed inequality + expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); + expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); + expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + + expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); + expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); + expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + + expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); + expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); + expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + // Left-shift + expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + + expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + + expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + + // Unsigned right-shift + expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2))& uint32_t(fullBitsMask_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3))& uint32_t(fullBitsMask_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); + + // Signed right-shift + expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + } + + performCpuTests(testInput, expected); + performGpuTests(testInput, expected); + } + m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +private: + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + + fillTestValues(commonTestInputValues, cpuTestValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); + verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); + verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); + verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); + verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); + verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); + verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); + verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); + verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); + verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); + verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); + verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); + verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); + verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); + + // // Morton Plus + verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); + verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); + verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); + verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); + + verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); + verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); + verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); + verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); + + verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); + verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); + verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); + verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); + + // // Morton Minus + verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); + verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); + verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); + verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); + + verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); + verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); + verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); + verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); + + verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); + verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); + verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); + verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); + + // // Morton coordinate-wise equality + verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); + verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); + verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); + verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); + + verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); + verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); + verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); + verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); + + verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); + verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); + verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); + verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType); + + // // Morton coordinate-wise unsigned inequality + verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); + verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); + verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); + verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); + + verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); + verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); + verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); + verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); + + verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); + verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); + verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); + + // // Morton coordinate-wise signed inequality + verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); + verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); + verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); + verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); + + verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); + verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); + verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); + verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); + + verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); + verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); + verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); + verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + + // // Morton left-shift + verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); + verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); + verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); + verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); + + verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); + verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); + verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); + verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); + + verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); + verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); + verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); + verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); + + // // Morton unsigned right-shift + verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); + verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); + verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); + + verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); + verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); + verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); + + verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); + verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); + verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); + verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); + + // // Morton signed right-shift + verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); + verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); + verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); + + verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); + verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); + verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); + + verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); + verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); + verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); + } +}; + +#endif \ No newline at end of file diff --git a/73_Mortons/ITester.h b/14_Mortons/ITester.h similarity index 100% rename from 73_Mortons/ITester.h rename to 14_Mortons/ITester.h diff --git a/73_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl similarity index 90% rename from 73_Mortons/app_resources/common.hlsl rename to 14_Mortons/app_resources/common.hlsl index 18cdc058f..237e3260e 100644 --- a/73_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -19,6 +19,10 @@ NBL_CONSTEXPR uint16_t smallBits_4 = 4; NBL_CONSTEXPR uint16_t mediumBits_4 = 8; NBL_CONSTEXPR uint16_t fullBits_4 = 16; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T bitMask = (uint64_t(1) << Bits) - 1; + + #ifndef __HLSL_VERSION constexpr uint64_t smallBitsMask_2 = (uint64_t(1) << smallBits_2) - 1; @@ -36,6 +40,42 @@ constexpr uint64_t fullBitsMask_4 = (uint64_t(1) << fullBits_4) - 1; #endif using namespace nbl::hlsl; +template +T createAnyBitIntegerFromU64(uint64_t val) +{ + if(Signed && (_static_cast(val) < 0)) + { + // fill excess bit with one + return T(val) | ~bitMask; + } else + { + return T(val) & bitMask; + + } +} + +template +vector createAnyBitIntegerVecFromU64Vec(vector val) +{ + array_get, T> getter; + array_set, T> setter; + vector output; + NBL_UNROLL + for (uint16_t i = 0; i < D; i++) + { + setter(output, i, createAnyBitIntegerFromU64(getter(val, i))); + } + return output; +} + +template +morton::code createMortonFromU64Vec(const vector vec) +{ + using morton_code_t = morton::code; + using decode_component_t = typename morton_code_t::decode_component_t; + return morton_code_t::create(createAnyBitIntegerVecFromU64Vec(vec)); +} + struct InputTestValues { // Both tests @@ -203,6 +243,7 @@ struct TestValues morton::code mortonSignedRightShift_full_4; morton::code mortonSignedRightShift_emulated_4; + /* void fillSecondTestValues(NBL_CONST_REF_ARG(InputTestValues) input) { diff --git a/73_Mortons/app_resources/test.comp.hlsl b/14_Mortons/app_resources/test.comp.hlsl similarity index 100% rename from 73_Mortons/app_resources/test.comp.hlsl rename to 14_Mortons/app_resources/test.comp.hlsl diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl new file mode 100644 index 000000000..dbe6ddbd2 --- /dev/null +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -0,0 +1,253 @@ +#include "common.hlsl" + + +void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +{ + emulated_uint64_t emulatedA = _static_cast(input.generatedA); + emulated_uint64_t emulatedB = _static_cast(input.generatedB); + emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); + + // Emulated int tests + output.emulatedAnd = emulatedA & emulatedB; + output.emulatedOr = emulatedA | emulatedB; + output.emulatedXor = emulatedA ^ emulatedB; + output.emulatedNot = emulatedA.operator~(); + output.emulatedPlus = emulatedA + emulatedB; + output.emulatedMinus = emulatedA - emulatedB; + output.emulatedLess = uint32_t(emulatedA < emulatedB); + output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); + output.emulatedGreater = uint32_t(emulatedA > emulatedB); + output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); + + left_shift_operator leftShift; + output.emulatedLeftShifted = leftShift(emulatedA, input.shift); + + arithmetic_right_shift_operator unsignedRightShift; + output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); + + arithmetic_right_shift_operator signedRightShift; + output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); + + output.emulatedUnaryMinus = signedEmulatedA.operator-(); + + // Morton tests + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2A = createMortonFromU64Vec(Vec2A); + morton::code morton_small_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_medium_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_full_2B = createMortonFromU64Vec(Vec2B); + morton::code morton_emulated_2B = createMortonFromU64Vec(Vec2B); + + morton::code morton_small_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3A = createMortonFromU64Vec(Vec3A); + morton::code morton_small_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_medium_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_full_3B = createMortonFromU64Vec(Vec3B); + morton::code morton_emulated_3B = createMortonFromU64Vec(Vec3B); + + morton::code morton_small_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_small_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_medium_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_full_4B = createMortonFromU64Vec(Vec4B); + morton::code morton_emulated_4B = createMortonFromU64Vec(Vec4B); + + morton::code morton_small_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_medium_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_full_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + + morton::code morton_small_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_medium_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_full_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + + morton::code morton_small_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + // Plus + output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; + output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; + output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; + output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; + + output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; + output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; + output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; + output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; + + output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; + output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; + output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; + output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; + + // // Minus + output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; + output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; + output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; + output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; + + output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; + output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; + output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; + output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; + + output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; + output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; + output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; + output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; + + // // Coordinate-wise equality + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); + + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); + + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); + // output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); + + // Coordinate-wise unsigned inequality (just testing with less) + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); + + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); + // output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); + // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); + // less(Vec4A, Vec4B); + + // Coordinate-wise signed inequality + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2B))); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2B))); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2B))); + // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(int32_t2(Vec2B))); + + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3B))); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3B))); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3B))); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(int32_t3(Vec3B))); + + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4B))); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4B))); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4B))); + // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(int16_t4(Vec4B))); + + // // Cast to uint16_t which is what left shift for Mortons expect + uint16_t castedShift = uint16_t(input.shift); + // // Each left shift clamps to correct bits so the result kinda makes sense + // // Left-shift + left_shift_operator > leftShiftSmall2; + output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); + left_shift_operator > leftShiftMedium2; + output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + left_shift_operator > leftShiftFull2; + output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); + left_shift_operator > leftShiftEmulated2; + output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + left_shift_operator > leftShiftSmall3; + output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); + left_shift_operator > leftShiftMedium3; + output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + left_shift_operator > leftShiftFull3; + output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); + left_shift_operator > leftShiftEmulated3; + output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + left_shift_operator > leftShiftSmall4; + output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); + left_shift_operator > leftShiftMedium4; + output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + left_shift_operator > leftShiftFull4; + output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); + left_shift_operator > leftShiftEmulated4; + output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // // Unsigned right-shift + arithmetic_right_shift_operator > rightShiftSmall2; + output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftMedium2; + output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftFull2; + output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftEmulated2; + output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSmall3; + output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftMedium3; + output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftFull3; + output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftEmulated3; + output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSmall4; + output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftMedium4; + output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftFull4; + output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); + arithmetic_right_shift_operator > rightShiftEmulated4; + output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); + + // // Signed right-shift + arithmetic_right_shift_operator > rightShiftSignedSmall2; + output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); + arithmetic_right_shift_operator > rightShiftSignedMedium2; + output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); + arithmetic_right_shift_operator > rightShiftSignedFull2; + output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + + arithmetic_right_shift_operator > rightShiftSignedSmall3; + output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); + arithmetic_right_shift_operator > rightShiftSignedMedium3; + output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); + arithmetic_right_shift_operator > rightShiftSignedFull3; + output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + + arithmetic_right_shift_operator > rightShiftSignedSmall4; + output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); + arithmetic_right_shift_operator > rightShiftSignedMedium4; + output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); + arithmetic_right_shift_operator > rightShiftSignedFull4; + output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + + // arithmetic_right_shift_operator > rightShiftSignedEmulated2; + // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift); + // arithmetic_right_shift_operator > rightShiftSignedEmulated3; + // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift); + // arithmetic_right_shift_operator > rightShiftSignedEmulated4; + // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift); +} \ No newline at end of file diff --git a/73_Mortons/config.json.template b/14_Mortons/config.json.template similarity index 100% rename from 73_Mortons/config.json.template rename to 14_Mortons/config.json.template diff --git a/73_Mortons/main.cpp b/14_Mortons/main.cpp similarity index 100% rename from 73_Mortons/main.cpp rename to 14_Mortons/main.cpp diff --git a/73_Mortons/pipeline.groovy b/14_Mortons/pipeline.groovy similarity index 100% rename from 73_Mortons/pipeline.groovy rename to 14_Mortons/pipeline.groovy diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h index fa29f3c9c..b4097dad6 100644 --- a/73_Mortons/CTester.h +++ b/73_Mortons/CTester.h @@ -113,37 +113,37 @@ class CTester final : public ITester int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); // Plus - expected.mortonPlus_small_2 = morton::code::create(Vec2ASmall + Vec2BSmall); - expected.mortonPlus_medium_2 = morton::code::create(Vec2AMedium + Vec2BMedium); - expected.mortonPlus_full_2 = morton::code::create(Vec2AFull + Vec2BFull); - expected.mortonPlus_emulated_2 = morton::code::create(Vec2AFull + Vec2BFull); - - expected.mortonPlus_small_3 = morton::code::create(Vec3ASmall + Vec3BSmall); - expected.mortonPlus_medium_3 = morton::code::create(Vec3AMedium + Vec3BMedium); - expected.mortonPlus_full_3 = morton::code::create(Vec3AFull + Vec3BFull); - expected.mortonPlus_emulated_3 = morton::code::create(Vec3AFull + Vec3BFull); - - expected.mortonPlus_small_4 = morton::code::create(Vec4ASmall + Vec4BSmall); - expected.mortonPlus_medium_4 = morton::code::create(Vec4AMedium + Vec4BMedium); - expected.mortonPlus_full_4 = morton::code::create(Vec4AFull + Vec4BFull); - expected.mortonPlus_emulated_4 = morton::code::create(Vec4AFull + Vec4BFull); - - // Minus - expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); - expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); - expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); - expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); - - expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); - expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); - expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); - expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); - - expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); - expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); - expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); - expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); - + expected.mortonPlus_small_2 = morton::code::create((Vec2ASmall + Vec2BSmall) & static_cast(smallBitsMask_2)); + expected.mortonPlus_medium_2 = morton::code::create((Vec2AMedium + Vec2BMedium) & static_cast(mediumBitsMask_2)); + expected.mortonPlus_full_2 = morton::code::create((Vec2AFull + Vec2BFull) & static_cast(fullBitsMask_2)); + expected.mortonPlus_emulated_2 = morton::code::create((Vec2AFull + Vec2BFull) & static_cast(fullBitsMask_2)); + + expected.mortonPlus_small_3 = morton::code::create((Vec3ASmall + Vec3BSmall) & static_cast(smallBitsMask_3)); + expected.mortonPlus_medium_3 = morton::code::create((Vec3AMedium + Vec3BMedium) & static_cast(mediumBitsMask_3)); + expected.mortonPlus_full_3 = morton::code::create((Vec3AFull + Vec3BFull) & static_cast(fullBitsMask_3)); + expected.mortonPlus_emulated_3 = morton::code::create((Vec3AFull + Vec3BFull) & static_cast(fullBitsMask_3)); + + expected.mortonPlus_small_4 = morton::code::create((Vec4ASmall + Vec4BSmall) & static_cast(smallBitsMask_4)); + expected.mortonPlus_medium_4 = morton::code::create((Vec4AMedium + Vec4BMedium) & static_cast(mediumBitsMask_4)); + expected.mortonPlus_full_4 = morton::code::create((Vec4AFull + Vec4BFull) & static_cast(fullBitsMask_4)); + expected.mortonPlus_emulated_4 = morton::code::create((Vec4AFull + Vec4BFull) & static_cast(fullBitsMask_4)); + + // // Minus + // expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); + // expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); + // expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); + // expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); + // + // expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); + // expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); + // expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); + // expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); + // + // expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); + // expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); + // expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); + // expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); + // // Coordinate-wise equality expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); @@ -221,17 +221,17 @@ class CTester final : public ITester expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + // expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); + // expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); + // expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); + // + // expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); + // expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); + // expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); + // + // expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); + // expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); + // expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); } performCpuTests(testInput, expected); diff --git a/73_Mortons/app_resources/testCommon.hlsl b/73_Mortons/app_resources/testCommon.hlsl index 4ca2b859d..93205db62 100644 --- a/73_Mortons/app_resources/testCommon.hlsl +++ b/73_Mortons/app_resources/testCommon.hlsl @@ -1,5 +1,17 @@ #include "common.hlsl" +template +morton::code createMortonFromAnyVec(vector, Dim> val) +{ + using morton_code_t = morton::code; + using decode_element_t = typename morton_code_t::decode_component_t ; + NBL_IF_CONSTEXPR(Signed) + { + return morton_code_t::create(_static_cast >(val & )); + + } +} + void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) { emulated_uint64_t emulatedA = _static_cast(input.generatedA); @@ -48,44 +60,44 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa int64_t4 Vec4ASigned = int64_t4(Vec4A); int64_t4 Vec4BSigned = int64_t4(Vec4B); - morton::code morton_small_2A = morton::code::create(Vec2A); - morton::code morton_medium_2A = morton::code::create(Vec2A); - morton::code morton_full_2A = morton::code::create(Vec2A); - morton::code morton_emulated_2A = morton::code::create(Vec2A); - morton::code morton_small_2B = morton::code::create(Vec2B); - morton::code morton_medium_2B = morton::code::create(Vec2B); - morton::code morton_full_2B = morton::code::create(Vec2B); - morton::code morton_emulated_2B = morton::code::create(Vec2B); - - morton::code morton_small_3A = morton::code::create(Vec3A); - morton::code morton_medium_3A = morton::code::create(Vec3A); - morton::code morton_full_3A = morton::code::create(Vec3A); - morton::code morton_emulated_3A = morton::code::create(Vec3A); - morton::code morton_small_3B = morton::code::create(Vec3B); - morton::code morton_medium_3B = morton::code::create(Vec3B); - morton::code morton_full_3B = morton::code::create(Vec3B); - morton::code morton_emulated_3B = morton::code::create(Vec3B); - - morton::code morton_small_4A = morton::code::create(Vec4A); - morton::code morton_medium_4A = morton::code::create(Vec4A); - morton::code morton_full_4A = morton::code::create(Vec4A); - morton::code morton_emulated_4A = morton::code::create(Vec4A); - morton::code morton_small_4B = morton::code::create(Vec4B); - morton::code morton_medium_4B = morton::code::create(Vec4B); - morton::code morton_full_4B = morton::code::create(Vec4B); - morton::code morton_emulated_4B = morton::code::create(Vec4B); - - morton::code morton_small_2_signed = morton::code::create(Vec2ASigned); - morton::code morton_medium_2_signed = morton::code::create(Vec2ASigned); - morton::code morton_full_2_signed = morton::code::create(Vec2ASigned); - - morton::code morton_small_3_signed = morton::code::create(Vec3ASigned); - morton::code morton_medium_3_signed = morton::code::create(Vec3ASigned); - morton::code morton_full_3_signed = morton::code::create(Vec3ASigned); - - morton::code morton_small_4_signed = morton::code::create(Vec4ASigned); - morton::code morton_medium_4_signed = morton::code::create(Vec4ASigned); - morton::code morton_full_4_signed = morton::code::create(Vec4ASigned); + morton::code morton_small_2A = createMortonFromAnyVec(Vec2A); + morton::code morton_medium_2A = createMortonFromAnyVec(Vec2A); + morton::code morton_full_2A = createMortonFromAnyVec(Vec2A); + morton::code morton_emulated_2A = createMortonFromAnyVec(Vec2A); + morton::code morton_small_2B = createMortonFromAnyVec(Vec2B); + morton::code morton_medium_2B = createMortonFromAnyVec(Vec2B); + morton::code morton_full_2B = createMortonFromAnyVec(Vec2B); + morton::code morton_emulated_2B = createMortonFromAnyVec(Vec2B); + + morton::code morton_small_3A = createMortonFromAnyVec(Vec3A); + morton::code morton_medium_3A = createMortonFromAnyVec(Vec3A); + morton::code morton_full_3A = createMortonFromAnyVec(Vec3A); + morton::code morton_emulated_3A = createMortonFromAnyVec(Vec3A); + morton::code morton_small_3B = createMortonFromAnyVec(Vec3B); + morton::code morton_medium_3B = createMortonFromAnyVec(Vec3B); + morton::code morton_full_3B = createMortonFromAnyVec(Vec3B); + morton::code morton_emulated_3B = createMortonFromAnyVec(Vec3B); + + morton::code morton_small_4A = createMortonFromAnyVec(Vec4A); + morton::code morton_medium_4A = createMortonFromAnyVec(Vec4A); + morton::code morton_full_4A = createMortonFromAnyVec(Vec4A); + morton::code morton_emulated_4A = createMortonFromAnyVec(Vec4A); + morton::code morton_small_4B = createMortonFromAnyVec(Vec4B); + morton::code morton_medium_4B = createMortonFromAnyVec(Vec4B); + morton::code morton_full_4B = createMortonFromAnyVec(Vec4B); + morton::code morton_emulated_4B = createMortonFromAnyVec(Vec4B); + + morton::code morton_small_2_signed = createMortonFromAnyVec(Vec2ASigned); + morton::code morton_medium_2_signed = createMortonFromAnyVec(Vec2ASigned); + morton::code morton_full_2_signed = createMortonFromAnyVec(Vec2ASigned); + + morton::code morton_small_3_signed = createMortonFromAnyVec(Vec3ASigned); + morton::code morton_medium_3_signed = createMortonFromAnyVec(Vec3ASigned); + morton::code morton_full_3_signed = createMortonFromAnyVec(Vec3ASigned); + + morton::code morton_small_4_signed = createMortonFromAnyVec(Vec4ASigned); + morton::code morton_medium_4_signed = createMortonFromAnyVec(Vec4ASigned); + morton::code morton_full_4_signed = createMortonFromAnyVec(Vec4ASigned); // Plus output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; @@ -133,6 +145,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); // Coordinate-wise unsigned inequality (just testing with less) output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); diff --git a/CMakeLists.txt b/CMakeLists.txt index b85577144..0f3c6bcb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(12_MeshLoaders) # add_subdirectory(13_MaterialCompilerTest) + add_subdirectory(14_Mortons EXCLUDE_FROM_ALL) # Waiting for a refactor #add_subdirectory(27_PLYSTLDemo) @@ -87,7 +88,6 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) - add_subdirectory(73_Mortons EXCLUDE_FROM_ALL) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 93861bd59f85721993472e3de67f23bec6170363 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 6 Dec 2025 21:02:46 +0300 Subject: [PATCH 30/57] Make camera account for up direction, corrected framebuffer resolutions for both views, solid angle shader now outputs correct cube vertices correctly --- .../hlsl/SolidAngleVis.frag.hlsl | 157 +++++++++++------- 72_SolidAngleVisualizer/include/transform.hpp | 2 +- 72_SolidAngleVisualizer/main.cpp | 134 ++++++++------- .../include/nbl/examples/cameras/CCamera.hpp | 50 +++--- 4 files changed, 190 insertions(+), 153 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index d783a5b37..2ad766c8a 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -9,7 +9,7 @@ using namespace ext::FullScreenTriangle; [[vk::push_constant]] struct PushConstants pc; -static const float CIRCLE_RADIUS = 0.45f; +static const float CIRCLE_RADIUS = 0.75f; // --- Geometry Utils --- @@ -33,17 +33,23 @@ static float3 corners[8]; static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; static float2 projCorners[8]; +static bool cornerVisible[8]; // Converts UV into centered, aspect-corrected NDC circle space float2 toCircleSpace(float2 uv) { - float aspect = pc.viewport.z / pc.viewport.w; - float2 centered = uv - 0.5f; - centered.x *= aspect; - return centered; + // Map [0,1] UV to [-1,1] + float2 p = uv * 2.0f - 1.0f; + + // Correct aspect ratio + float aspect = pc.viewport.z / pc.viewport.w; // width / height + p.x *= aspect; + + return p; } + // Distance to a 2D line segment float sdSegment(float2 p, float2 a, float2 b) { @@ -54,9 +60,18 @@ float sdSegment(float2 p, float2 a, float2 b) } // TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) -float2 project(float3 p) +bool projectToOrthoSphere(float3 p, out float2 uv) { - return normalize(p).xy; + float3 n = normalize(p); // direction to sphere + + // hemisphere (Z > 0) + if (n.z <= 0.0) + return false; + + // orthographic projection (drop Z) + uv = n.xy; + + return true; // valid } void computeCubeGeo() @@ -66,71 +81,72 @@ void computeCubeGeo() float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - corners[i] = worldPos; + corners[i] = worldPos.xyz; faceCenters[i/4] += worldPos / 4.0f; faceCenters[2+i%2] += worldPos / 4.0f; faceCenters[4+(i/2)%2] += worldPos / 4.0f; - float3 viewPos = worldPos; - projCorners[i] = project(viewPos); + float3 viewPos = worldPos.xyz; + cornerVisible[i] = projectToOrthoSphere(viewPos, projCorners[i]); + projCorners[i] *= CIRCLE_RADIUS; // scale to circle radius } } -int getVisibilityCount(int2 faces, float3 cameraPos) -{ - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); - float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); +// int getVisibilityCount(int2 faces, float3 cameraPos) +// { +// float3x3 rotMatrix = (float3x3)pc.modelMatrix; +// float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); +// float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; - float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; +// float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; +// float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; - // Face is visible if its outward normal points towards the origin (camera). - bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; - bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; +// // Face is visible if its outward normal points towards the origin (camera). +// bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; +// bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; - // Determine Line Style: - bool isSilhouette = visible1 != visible2; // One face visible, the other hidden - bool isInner = visible1 && visible2; // Both faces visible +// // Determine Line Style: +// bool isSilhouette = visible1 != visible2; // One face visible, the other hidden +// bool isInner = visible1 && visible2; // Both faces visible - int visibilityCount = 0; - if (isSilhouette) - { - visibilityCount = 1; - } - else if (isInner) - { - visibilityCount = 2; - } - - return visibilityCount; -} - -void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) -{ - if (visibilityCount > 0) - { - float3 A = corners[a]; - float3 B = corners[b]; - - float avgDepth = (length(A) + length(B)) * 0.5f; - float referenceDepth = 3.0f; - float depthScale = referenceDepth / avgDepth; - - float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; - float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; - float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red +// int visibilityCount = 0; +// if (isSilhouette) +// { +// visibilityCount = 1; +// } +// else if (isInner) +// { +// visibilityCount = 2; +// } + +// return visibilityCount; +// } + +// void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) +// { +// if (visibilityCount > 0) +// { +// float3 A = corners[a]; +// float3 B = corners[b]; + +// float avgDepth = (length(A) + length(B)) * 0.5f; +// float referenceDepth = 3.0f; +// float depthScale = referenceDepth / avgDepth; + +// float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; +// float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; +// float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red - float width = min(baseWidth * depthScale, 0.03f); +// float width = min(baseWidth * depthScale, 0.03f); - float dist = sdSegment(p, projCorners[a], projCorners[b]); +// float dist = sdSegment(p, projCorners[a], projCorners[b]); - float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); +// float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - color += edgeColor * alpha * intensity; - } -} +// color += edgeColor * alpha * intensity; +// } +// } void drawRing(float2 p, inout float4 color, float aaWidth) { @@ -149,6 +165,12 @@ void drawRing(float2 p, inout float4 color, float aaWidth) color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); } +float plotPoint(float2 uv, float2 p, float r) +{ + return step(length(uv - p), r); +} + + [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { float3 cameraPos = float3(0, 0, 0); // Camera at origin @@ -159,16 +181,25 @@ void drawRing(float2 p, inout float4 color, float aaWidth) float aaWidth = max(fwidth(p.x), fwidth(p.y)); - for (int j = 0; j < 12; j++) + float pointMask = 0.0; + for (int i=0; i<8; i++) { - int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - int b = a + (4 >> (j / 4)); - - int2 faces = edgeToFaces[j]; - int visibilityCount = getVisibilityCount(faces, cameraPos); - drawLine(p, a, b, visibilityCount, color, aaWidth); + if (cornerVisible[i]) + pointMask += plotPoint(p, projCorners[i], 0.015f); } + color += pointMask * float4(1,0,0,1); // red points + + // for (int j = 0; j < 12; j++) + // { + // int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + // int b = a + (4 >> (j / 4)); + + // // int2 faces = edgeToFaces[j]; + // // int visibilityCount = getVisibilityCount(faces, cameraPos); + // // drawLine(p, a, b, visibilityCount, color, aaWidth); + // } + drawRing(p, color, aaWidth); return color; diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 002a9d215..5061ebd49 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -19,7 +19,7 @@ struct TransformRequestParams struct TransformReturnInfo { - nbl::hlsl::uint16_t2 sceneResolution = { 2048,1024 }; + nbl::hlsl::uint16_t2 sceneResolution = { 0, 0 }; bool isGizmoWindowHovered; bool isGizmoBeingUsed; }; diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index b6d723e70..1025eb067 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -5,7 +5,6 @@ #include "common.hpp" #include "app_resources/hlsl/common.hlsl" - #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" /* @@ -319,10 +318,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // CPU events update(nextPresentationTimestamp); - const auto& virtualWindowRes = interface.transformReturnInfo.sceneResolution; - // TODO: check main frame buffer too - if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualWindowRes[1]) - recreateFramebuffer(virtualWindowRes); + { + const auto& virtualSolidAngleWindowRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const auto& virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; + if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || + !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) + recreateFramebuffer(); + } // const auto resourceIx = m_realFrameIx % MaxFramesInFlight; @@ -334,6 +336,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; if (m_solidAngleViewFramebuffer) { + auto creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); cb->beginDebugMarker("Draw Circle View Frame"); { const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; @@ -344,7 +347,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .depthStencilClearValues = &farValue, .renderArea = { .offset = {0,0}, - .extent = {virtualWindowRes[0],virtualWindowRes[1]} + .extent = {creationParams.width, creationParams.height} } }; beginRenderpass(cb, renderpassInfo); @@ -353,7 +356,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { PushConstants pc{ .modelMatrix = hlsl::float32_t3x4(hlsl::transpose(interface.m_OBBModelMatrix)), - .viewport = { 0.f,0.f,static_cast(virtualWindowRes[0]),static_cast(virtualWindowRes[1]) } + .viewport = { 0.f,0.f,static_cast(creationParams.width),static_cast(creationParams.height) } }; auto pipeline = m_visualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); @@ -369,6 +372,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { cb->beginDebugMarker("Main Scene Frame"); { + auto creationParams = m_mainViewFramebuffer->getCreationParameters(); const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth = 0.f }; const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { @@ -377,7 +381,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .depthStencilClearValues = &farValue, .renderArea = { .offset = {0,0}, - .extent = {virtualWindowRes[0],virtualWindowRes[1]} + .extent = {creationParams.width, creationParams.height} } }; beginRenderpass(cb, renderpassInfo); @@ -404,12 +408,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: a better way to get identity matrix float32_t3x4 origin = { - 0.2f,0.0f,0.0f,0.0f, - 0.0f,0.2f,0.0f,0.0f, - 0.0f,0.0f,0.2f,0.0f + 1.0f,0.0f,0.0f,0.0f, + 0.0f,1.0f,0.0f,0.0f, + 0.0f,0.0f,1.0f,0.0f }; memcpy(&instance.world, &origin, sizeof(instance.world)); - instance.packedGeo = m_renderer->getGeometries().data() + 3; // sphere + instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk m_renderer->render(cb, viewParams); } cb->endRenderPass(); @@ -575,7 +579,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { - //if (interface.move) + if (interface.move) camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl for (const auto& e : events) // here capture @@ -606,9 +610,10 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR interface.imGUI->update(params); } - void recreateFramebuffer(const uint16_t2 resolution) + void recreateFramebuffer() { - auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr + + auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format)->smart_refctd_ptr { auto image = m_device->createImage({ { .type = IGPUImage::ET_2D, @@ -632,29 +637,32 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr solidAngleView; smart_refctd_ptr mainView; + const uint16_t2 solidAngleViewRes = interface.solidAngleViewTransformReturnInfo.sceneResolution; + const uint16_t2 mainViewRes = interface.mainViewTransformReturnInfo.sceneResolution; + // detect window minimization - if (resolution.x < 0x4000 && resolution.y < 0x4000) + if (solidAngleViewRes.x < 0x4000 && solidAngleViewRes.y < 0x4000 || + mainViewRes.x < 0x4000 && mainViewRes.y < 0x4000) { - solidAngleView = createImageAndView(finalSceneRenderFormat); - auto solidAngleDepthView = createImageAndView(sceneRenderDepthFormat); + solidAngleView = createImageAndView(solidAngleViewRes, finalSceneRenderFormat); + auto solidAngleDepthView = createImageAndView(solidAngleViewRes, sceneRenderDepthFormat); m_solidAngleViewFramebuffer = m_device->createFramebuffer({ { .renderpass = m_solidAngleRenderpass, .depthStencilAttachments = &solidAngleDepthView.get(), .colorAttachments = &solidAngleView.get(), - .width = resolution.x, - .height = resolution.y + .width = solidAngleViewRes.x, + .height = solidAngleViewRes.y } }); - mainView = createImageAndView(finalSceneRenderFormat); - auto mainDepthView = createImageAndView(sceneRenderDepthFormat); + mainView = createImageAndView(mainViewRes, finalSceneRenderFormat); + auto mainDepthView = createImageAndView(mainViewRes, sceneRenderDepthFormat); m_mainViewFramebuffer = m_device->createFramebuffer({ { .renderpass = m_mainRenderpass, .depthStencilAttachments = &mainDepthView.get(), .colorAttachments = &mainView.get(), - .width = resolution.x, - .height = resolution.y + .width = mainViewRes.x, + .height = mainViewRes.y } }); - } else { @@ -715,6 +723,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; + constexpr static inline float32_t4x4 OBBModelMatrixDefault + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 6.0f, 1.0f + }; // smart_refctd_ptr m_scene; smart_refctd_ptr m_solidAngleRenderpass; @@ -722,7 +737,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_renderer; smart_refctd_ptr m_solidAngleViewFramebuffer; smart_refctd_ptr m_mainViewFramebuffer; - smart_refctd_ptr m_visualizationPipeline; + smart_refctd_ptr m_visualizationPipeline; // smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; @@ -733,19 +748,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // UI stuff struct CInterface { - void cameraToHome() - { - core::vectorSIMDf cameraPosition(-3.0f, 3.0f, 6.0f); - core::vectorSIMDf cameraTarget(0.f, 0.f, 6.f); - const static core::vectorSIMDf up(0.f, 1.f, 0.f); - - camera.setPosition(cameraPosition); - camera.setTarget(cameraTarget); - camera.setBackupUpVector(up); - - camera.recomputeViewMatrix(); - } - void operator()() { ImGuiIO& io = ImGui::GetIO(); @@ -773,7 +775,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return projection; }()); - ImGuizmo::SetOrthographic(false); + ImGuizmo::SetOrthographic(!isPerspective); ImGuizmo::BeginFrame(); ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); @@ -830,7 +832,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (viewDirty || firstFrame) { - cameraToHome(); + camera.setPosition(cameraIntialPosition); + camera.setTarget(cameraInitialTarget); + camera.setBackupUpVector(cameraInitialUp); + camera.setUpVector(cameraInitialUp); + + camera.recomputeViewMatrix(); } firstFrame = false; @@ -895,19 +902,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR * note it also modifies input view matrix but projection matrix is immutable */ - if (ImGui::IsKeyPressed(ImGuiKey_Home)) - { - cameraToHome(); - } + // No need because camera already has this functionality + // if (ImGui::IsKeyPressed(ImGuiKey_Home)) + // { + // cameraToHome(); + // } if (ImGui::IsKeyPressed(ImGuiKey_End)) { - m_OBBModelMatrix = { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 12.0f, 1.0f - }; + m_OBBModelMatrix = OBBModelMatrixDefault; } static struct @@ -930,10 +933,14 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ transformParams.editTransformDecomposition = true; - transformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + // MODEL: Zup -> Yup + + m_OBBModelMatrix = imguizmoM16InOut.model; // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used - move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || transformReturnInfo.isGizmoWindowHovered) && (!transformReturnInfo.isGizmoBeingUsed); + move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); + } // to Nabla + update camera & model matrices @@ -957,9 +964,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SetNextWindowSize(ImVec2(800, 800), ImGuiCond_Appearing); ImGui::SetNextWindowPos(ImVec2(1240, 20), ImGuiCond_Appearing); static bool isOpen = true; - ImGui::Begin("Solid angle view", &isOpen, 0); + ImGui::Begin("Projected Solid Angle View", &isOpen, 0); ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); + solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); + solidAngleViewTransformReturnInfo.isGizmoBeingUsed = false; // not used in this view + solidAngleViewTransformReturnInfo.isGizmoWindowHovered = false; // not used in this view ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); ImGui::End(); } @@ -1081,21 +1091,19 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); // mutables - float32_t4x4 m_OBBModelMatrix{ - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 12.0f, 1.0f - }; + float32_t4x4 m_OBBModelMatrix = OBBModelMatrixDefault; //std::string_view objectName; TransformRequestParams transformParams; - TransformReturnInfo transformReturnInfo; + TransformReturnInfo mainViewTransformReturnInfo; + TransformReturnInfo solidAngleViewTransformReturnInfo; + + const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; + const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; + const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; float fov = 90.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; float viewWidth = 10.f; - float camYAngle = 90.f / 180.f * 3.14159f; - float camXAngle = 0.f / 180.f * 3.14159f; //uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed bool isPerspective = true, isLH = true, flipGizmoY = true, move = true; bool firstFrame = true; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index 3b3cd38d8..f35cd341a 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -149,38 +149,36 @@ class Camera if(ev.type == nbl::ui::SMouseEvent::EET_MOVEMENT && mouseDown) { nbl::core::vectorSIMDf pos = getPosition(); - nbl::core::vectorSIMDf localTarget = getTarget() - pos; - - // Get Relative Rotation for localTarget in Radians - float relativeRotationX, relativeRotationY; - relativeRotationY = atan2(localTarget.X, localTarget.Z); - const double z1 = nbl::core::sqrt(localTarget.X*localTarget.X + localTarget.Z*localTarget.Z); - relativeRotationX = atan2(z1, localTarget.Y) - nbl::core::PI()/2; - - constexpr float RotateSpeedScale = 0.003f; - relativeRotationX -= ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; - float tmpYRot = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; + nbl::core::vectorSIMDf upVector = getUpVector(); + nbl::core::vectorSIMDf forward = nbl::core::normalize(getTarget() - pos); + + nbl::core::vectorSIMDf right = nbl::core::normalize(nbl::core::cross(forward, upVector)); + nbl::core::vectorSIMDf up = nbl::core::normalize(nbl::core::cross(right, forward)); + + constexpr float RotateSpeedScale = 0.003f; + float pitchDelta = ev.movementEvent.relativeMovementY * rotateSpeed * RotateSpeedScale * -1.0f; + float yawDelta = ev.movementEvent.relativeMovementX * rotateSpeed * RotateSpeedScale * -1.0f; if (leftHanded) - relativeRotationY -= tmpYRot; - else - relativeRotationY += tmpYRot; + yawDelta = -yawDelta; - const double MaxVerticalAngle = nbl::core::radians(88.0f); + // Clamp pitch BEFORE applying rotation + const float MaxVerticalAngle = nbl::core::radians(88.0f); + float currentPitch = asin(nbl::core::dot(forward, upVector).X); + float newPitch = nbl::core::clamp(currentPitch + pitchDelta, -MaxVerticalAngle, MaxVerticalAngle); + pitchDelta = newPitch - currentPitch; - if (relativeRotationX > MaxVerticalAngle*2 && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = 2 * nbl::core::PI()-MaxVerticalAngle; - else - if (relativeRotationX > MaxVerticalAngle && relativeRotationX < 2 * nbl::core::PI()-MaxVerticalAngle) - relativeRotationX = MaxVerticalAngle; + // Create rotation quaternions using axis-angle method + nbl::core::quaternion pitchRot = nbl::core::quaternion::fromAngleAxis(pitchDelta, right); + nbl::core::quaternion yawRot = nbl::core::quaternion::fromAngleAxis(yawDelta, upVector); + nbl::core::quaternion combinedRot = yawRot * pitchRot; - localTarget.set(0,0, nbl::core::max(1.f, nbl::core::length(pos)[0]), 1.f); + // Apply to forward vector + forward = nbl::core::normalize(combinedRot.transformVect(forward)); - nbl::core::matrix3x4SIMD mat; - mat.setRotation(nbl::core::quaternion(relativeRotationX, relativeRotationY, 0)); - mat.transformVect(localTarget); - - setTarget(localTarget + pos); + // Set new target + float targetDistance = nbl::core::length(getTarget() - pos).X; + setTarget(pos + forward * targetDistance); } } } From adb15edd201e82cbc9ed3526bbfccfc67ccdf4ff Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:12:56 +0300 Subject: [PATCH 31/57] sphere arc "cube edge" in solid angle view, more reliable resizing of windows --- .../hlsl/SolidAngleVis.frag.hlsl | 218 ++++++++---------- 72_SolidAngleVisualizer/main.cpp | 24 +- 2 files changed, 107 insertions(+), 135 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 2ad766c8a..badf1e4be 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -32,8 +32,7 @@ static const float3 localNormals[6] = { static float3 corners[8]; static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; -static float2 projCorners[8]; -static bool cornerVisible[8]; + // Converts UV into centered, aspect-corrected NDC circle space @@ -46,32 +45,7 @@ float2 toCircleSpace(float2 uv) float aspect = pc.viewport.z / pc.viewport.w; // width / height p.x *= aspect; - return p; -} - - -// Distance to a 2D line segment -float sdSegment(float2 p, float2 a, float2 b) -{ - float2 pa = p - a; - float2 ba = b - a; - float h = clamp(dot(pa, ba) / dot(ba, ba), 0.0f, 1.0f); - return length(pa - ba * h); -} - -// TODO: Hemispherical Projection (Solid Angle / Orthographic/Lambertian Projection) -bool projectToOrthoSphere(float3 p, out float2 uv) -{ - float3 n = normalize(p); // direction to sphere - - // hemisphere (Z > 0) - if (n.z <= 0.0) - return false; - - // orthographic projection (drop Z) - uv = n.xy; - - return true; // valid + return p * CIRCLE_RADIUS; } void computeCubeGeo() @@ -86,121 +60,121 @@ void computeCubeGeo() faceCenters[i/4] += worldPos / 4.0f; faceCenters[2+i%2] += worldPos / 4.0f; faceCenters[4+(i/2)%2] += worldPos / 4.0f; - - float3 viewPos = worldPos.xyz; - cornerVisible[i] = projectToOrthoSphere(viewPos, projCorners[i]); - projCorners[i] *= CIRCLE_RADIUS; // scale to circle radius } } -// int getVisibilityCount(int2 faces, float3 cameraPos) -// { -// float3x3 rotMatrix = (float3x3)pc.modelMatrix; -// float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); -// float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - -// float3 viewVec_f1 = faceCenters[faces.x] - cameraPos; -// float3 viewVec_f2 = faceCenters[faces.y] - cameraPos; - -// // Face is visible if its outward normal points towards the origin (camera). -// bool visible1 = dot(n_world_f1, viewVec_f1) < 0.0f; -// bool visible2 = dot(n_world_f2, viewVec_f2) < 0.0f; - -// // Determine Line Style: -// bool isSilhouette = visible1 != visible2; // One face visible, the other hidden -// bool isInner = visible1 && visible2; // Both faces visible - -// int visibilityCount = 0; -// if (isSilhouette) -// { -// visibilityCount = 1; -// } -// else if (isInner) -// { -// visibilityCount = 2; -// } - -// return visibilityCount; -// } - -// void drawLine(float2 p, int a, int b, int visibilityCount, inout float4 color, float aaWidth) -// { -// if (visibilityCount > 0) -// { -// float3 A = corners[a]; -// float3 B = corners[b]; - -// float avgDepth = (length(A) + length(B)) * 0.5f; -// float referenceDepth = 3.0f; -// float depthScale = referenceDepth / avgDepth; - -// float baseWidth = (visibilityCount == 1) ? 0.005f : 0.002f; -// float intensity = (visibilityCount == 1) ? 1.0f : 0.5f; -// float4 edgeColor = (visibilityCount == 1) ? float4(0.0f, 0.5f, 1.0f, 1.0f) : float4(1.0f, 0.0f, 0.0f, 1.0f); // Blue vs Red - -// float width = min(baseWidth * depthScale, 0.03f); - -// float dist = sdSegment(p, projCorners[a], projCorners[b]); - -// float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - -// color += edgeColor * alpha * intensity; -// } -// } - -void drawRing(float2 p, inout float4 color, float aaWidth) +float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); - - // Mask to cut off drawing outside the circle - // float circleMask = 1.0f - smoothstep(CIRCLE_RADIUS, CIRCLE_RADIUS + aaWidth, positionLength); - // color *= circleMask; // Add a white background circle ring - float ringWidth = 0.005f; + float ringWidth = 0.01f; float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - // Ring color is now white - color = max(color, float4(1.0, 1.0, 1.0, 1.0) * ringAlpha); + return ringAlpha.xxxx; } -float plotPoint(float2 uv, float2 p, float r) +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float3 faceCenter, float3 faceNormal) { - return step(length(uv - p), r); + // Face is visible if normal points toward camera (at origin) + float3 viewVec = -normalize(faceCenter); // Vector from face to camera + return dot(faceNormal, viewVec) > 0.0f; } +int getEdgeVisibility(int edgeIdx, float3 cameraPos) +{ + int2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) return 2; + + // Hidden edge: both faces hidden + return 0; +} + +// Draw great circle arc in fragment shader +float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) +{ + if (visibility == 0) return float4(0,0,0,0); // Hidden edge + + float3 v0 = normalize(corners[edgeVerts.x]); + float3 v1 = normalize(corners[edgeVerts.y]); + float3 p = normalize(fragPos); // Current point on hemisphere + + // Great circle plane normal + float3 arcNormal = normalize(cross(v0, v1)); + + // Distance to great circle + float dist = abs(dot(p, arcNormal)); + + // Check if point is within arc bounds + float dotMid = dot(v0, v1); + bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); + + if (!onArc) return float4(0,0,0,0); + + // Depth-based width scaling + float avgDepth = (length(corners[edgeVerts.x]) + length(corners[edgeVerts.y])) * 0.5f; + float depthScale = 3.0f / avgDepth; + + float baseWidth = (visibility == 1) ? 0.01f : 0.005f; + float width = min(baseWidth * depthScale, 0.02f); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + float4 edgeColor = (visibility == 1) ? + float4(0.0f, 0.5f, 1.0f, 1.0f) : // Silhouette: blue + float4(1.0f, 0.0f, 0.0f, 1.0f); // Inner: red + + float intensity = (visibility == 1) ? 1.0f : 0.5f; + return edgeColor * alpha * intensity; +} [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float3 cameraPos = float3(0, 0, 0); // Camera at origin - float2 p = toCircleSpace(vx.uv); + float3 cameraPos = float3(0, 0, 0); float4 color = float4(0, 0, 0, 0); - - computeCubeGeo(); + float2 p = toCircleSpace(vx.uv); - float aaWidth = max(fwidth(p.x), fwidth(p.y)); - - float pointMask = 0.0; - for (int i=0; i<8; i++) + // Convert 2D disk position to 3D hemisphere position + // p is in range [-CIRCLE_RADIUS, CIRCLE_RADIUS] + float2 normalized = p / CIRCLE_RADIUS; // Now in range [-1, 1] + float r2 = dot(normalized, normalized); + + if (r2 > 1.0f) + discard; + + // Convert UV to 3D position on hemisphere + float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); + + computeCubeGeo(); // Your existing function + + float aaWidth = length(float2(ddx(p.x), ddy(p.y))); + + // Draw edges as great circle arcs + for (int j = 0; j < 12; j++) { - if (cornerVisible[i]) - pointMask += plotPoint(p, projCorners[i], 0.015f); + int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); + int b = a + (4 >> (j / 4)); + + int visibility = getEdgeVisibility(j, cameraPos); + color += drawGreatCircleArc(spherePos, int2(a, b), visibility, aaWidth); } - - color += pointMask * float4(1,0,0,1); // red points - - // for (int j = 0; j < 12; j++) - // { - // int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - // int b = a + (4 >> (j / 4)); - - // // int2 faces = edgeToFaces[j]; - // // int visibilityCount = getVisibilityCount(faces, cameraPos); - // // drawLine(p, a, b, visibilityCount, color, aaWidth); - // } - - drawRing(p, color, aaWidth); - + + color += drawRing(p, aaWidth); + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 1025eb067..8fb8bf144 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -323,7 +323,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const auto& virtualMainWindowRes = interface.mainViewTransformReturnInfo.sceneResolution; if (!m_solidAngleViewFramebuffer || m_solidAngleViewFramebuffer->getCreationParameters().width != virtualSolidAngleWindowRes[0] || m_solidAngleViewFramebuffer->getCreationParameters().height != virtualSolidAngleWindowRes[1] || !m_mainViewFramebuffer || m_mainViewFramebuffer->getCreationParameters().width != virtualMainWindowRes[0] || m_mainViewFramebuffer->getCreationParameters().height != virtualMainWindowRes[1]) - recreateFramebuffer(); + recreateFramebuffers(); } // @@ -402,10 +402,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto& instance = m_renderer->m_instances[0]; auto transposed = hlsl::transpose(interface.m_OBBModelMatrix); memcpy(&instance.world, &transposed, sizeof(instance.world)); - instance.packedGeo = m_renderer->getGeometries().data();// +interface.gcIndex; + instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB - // TODO: a better way to get identity matrix float32_t3x4 origin = { 1.0f,0.0f,0.0f,0.0f, @@ -536,7 +535,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR camera.setMoveSpeed(interface.moveSpeed); camera.setRotateSpeed(interface.rotateSpeed); - m_inputSystem->getDefaultMouse(&mouse); m_inputSystem->getDefaultKeyboard(&keyboard); @@ -610,7 +608,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR interface.imGUI->update(params); } - void recreateFramebuffer() + void recreateFramebuffers() { auto createImageAndView = [&](const uint16_t2 resolution, E_FORMAT format)->smart_refctd_ptr @@ -671,30 +669,30 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR } // release previous slot and its image - interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + interface.subAllocDS->multi_deallocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices, { .semaphore = m_semaphore.get(),.value = m_realFrameIx + 1 }); // - if (solidAngleView) + if (solidAngleView && mainView) { interface.subAllocDS->multi_allocate(0, static_cast(CInterface::Count), interface.renderColorViewDescIndices); // update descriptor set IGPUDescriptorSet::SDescriptorInfo infos[static_cast(CInterface::Count)] = {}; - infos[0].desc = solidAngleView; + infos[0].desc = mainView; infos[0].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; - infos[1].desc = mainView; + infos[1].desc = solidAngleView; infos[1].info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; const IGPUDescriptorSet::SWriteDescriptorSet write[static_cast(CInterface::Count)] = { {.dstSet = interface.subAllocDS->getDescriptorSet(), .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], .count = 1, .info = &infos[static_cast(CInterface::ERV_MAIN_VIEW)] }, { .dstSet = interface.subAllocDS->getDescriptorSet(), .binding = TexturesImGUIBindingIndex, - .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_MAIN_VIEW)], + .arrayElement = interface.renderColorViewDescIndices[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)], .count = 1, - .info = &infos[1] + .info = &infos[static_cast(CInterface::ERV_SOLID_ANGLE_VIEW)] } }; m_device->updateDescriptorSets({ write, static_cast(CInterface::Count) }, {}); @@ -728,7 +726,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 6.0f, 1.0f + 0.0f, 0.0f, 3.0f, 1.0f }; // smart_refctd_ptr m_scene; From 008e2ee154b6cf5ba725752a3f1b4dac5d37ff42 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 00:29:22 +0300 Subject: [PATCH 32/57] Scaling by pressing G to prevent conflict with WASD camera movement, also added Q and E for moving up and down --- 72_SolidAngleVisualizer/include/transform.hpp | 4 +++- common/include/nbl/examples/cameras/CCamera.hpp | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 5061ebd49..639c0fa3a 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -35,13 +35,15 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti static bool boundSizing = false; static bool boundSizingSnap = false; + ImGui::Text("Press T/R/G to change gizmo mode"); + if (params.editTransformDecomposition) { if (ImGui::IsKeyPressed(ImGuiKey_T)) mCurrentGizmoOperation = ImGuizmo::TRANSLATE; if (ImGui::IsKeyPressed(ImGuiKey_R)) mCurrentGizmoOperation = ImGuizmo::ROTATE; - if (ImGui::IsKeyPressed(ImGuiKey_S)) + if (ImGui::IsKeyPressed(ImGuiKey_G)) mCurrentGizmoOperation = ImGuizmo::SCALE; if (ImGui::RadioButton("Translate", mCurrentGizmoOperation == ImGuizmo::TRANSLATE)) mCurrentGizmoOperation = ImGuizmo::TRANSLATE; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index f35cd341a..e5f077e46 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -39,6 +39,8 @@ class Camera enum E_CAMERA_MOVE_KEYS : uint8_t { ECMK_MOVE_FORWARD = 0, + ECMK_MOVE_UP, + ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT, @@ -47,6 +49,8 @@ class Camera inline void mapKeysToWASD() { + keysMap[ECMK_MOVE_UP] = nbl::ui::EKC_E; + keysMap[ECMK_MOVE_DOWN] = nbl::ui::EKC_Q; keysMap[ECMK_MOVE_FORWARD] = nbl::ui::EKC_W; keysMap[ECMK_MOVE_BACKWARD] = nbl::ui::EKC_S; keysMap[ECMK_MOVE_LEFT] = nbl::ui::EKC_A; @@ -211,7 +215,7 @@ class Camera assert(timeDiff >= 0); // handle camera movement - for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) + for (const auto logicalKey : { ECMK_MOVE_FORWARD, ECMK_MOVE_UP, ECMK_MOVE_DOWN, ECMK_MOVE_BACKWARD, ECMK_MOVE_LEFT, ECMK_MOVE_RIGHT }) { const auto code = keysMap[logicalKey]; @@ -275,6 +279,9 @@ class Camera up = nbl::core::normalize(backupUpVector); } + pos += up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_UP] * moveSpeed * MoveSpeedScale; + pos -= up * perActionDt[E_CAMERA_MOVE_KEYS::ECMK_MOVE_DOWN] * moveSpeed * MoveSpeedScale; + nbl::core::vectorSIMDf strafevect = localTarget; if (leftHanded) strafevect = nbl::core::cross(strafevect, up); From 4290f4ab26360fbf8dac4c45c395fc4a20faf6e3 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 7 Dec 2025 16:33:09 +0300 Subject: [PATCH 33/57] better clipping of arcs behind the hemisphere --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index badf1e4be..c12c007a0 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -114,6 +114,10 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float float3 v1 = normalize(corners[edgeVerts.y]); float3 p = normalize(fragPos); // Current point on hemisphere + // Skip fragment if not in front of hemisphere or edge if both endpoints are behind horizon + if (p.z < 0.0f || (v0.z < 0.0f && v1.z < 0.0f)) + return float4(0,0,0,0); + // Great circle plane normal float3 arcNormal = normalize(cross(v0, v1)); From ba068c44c08a777bb6794b3e0f019cbdc3605480 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Mon, 8 Dec 2025 08:47:02 +0300 Subject: [PATCH 34/57] WIP quick push for shader code --- .../hlsl/SolidAngleVis.frag.hlsl | 154 +++++++++++++++--- 1 file changed, 135 insertions(+), 19 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index c12c007a0..7c96a8316 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -20,6 +20,25 @@ static const int2 edgeToFaces[12] = { {0,4}, {5,0}, {4,1}, {1,5} }; +//float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f +static const float3 constCorners[8] = { + float3(-1, -1, -1), // 0 + float3( 1, -1, -1), // 1 + float3(-1, 1, -1), // 2 + float3( 1, 1, -1), // 3 + float3(-1, -1, 1), // 4 + float3( 1, -1, 1), // 5 + float3(-1, 1, 1), // 6 + float3( 1, 1, 1) // 7 +}; + +// All 12 edges of the cube (vertex index pairs) +static const int2 allEdges[12] = { + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // Edges along X axis + {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Edges along Y axis + {0, 4}, {1, 5}, {2, 6}, {3, 7} // Edges along Z axis +}; + static const float3 localNormals[6] = { float3(0, 0, -1), // Face 0 (Z-) float3(0, 0, 1), // Face 1 (Z+) @@ -34,6 +53,30 @@ static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; +static const float3 colorLUT[8] = { + float3(0, 0, 0), // 0: Black + float3(1, 0, 0), // 1: Red + float3(0, 1, 0), // 2: Green + float3(1, 1, 0), // 3: Yellow + float3(0, 0, 1), // 4: Blue + float3(1, 0, 1), // 5: Magenta + float3(0, 1, 1), // 6: Cyan + float3(1, 1, 1) // 7: White +}; + + + +// Vertices are ordered CCW relative to the camera view. +static const int silhouettes[8][6] = { + {2, 3, 1, 5, 4, 6}, // 0: Black + {6, 7, 5, 1, 0, 2}, // 1: Red + {7, 6, 4, 0, 1, 3}, // 2: Green + {3, 7, 5, 4, 0, 2}, // 3: Yellow + {3, 2, 0, 4, 5, 7}, // 4: Cyan + {1, 3, 7, 6, 4, 0}, // 5: Magenta + {0, 1, 5, 7, 6, 2}, // 6: White + {4, 6, 2, 3, 1, 5} // 7: Gray +}; // Converts UV into centered, aspect-corrected NDC circle space float2 toCircleSpace(float2 uv) @@ -52,7 +95,7 @@ void computeCubeGeo() { for (int i = 0; i < 8; i++) { - float3 localPos = float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; + float3 localPos = constCorners[i]; //float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; corners[i] = worldPos.xyz; @@ -72,7 +115,7 @@ float4 drawRing(float2 p, float aaWidth) float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - return ringAlpha.xxxx; + return ringAlpha * float4(1, 1, 1, 1); } // Check if a face on the hemisphere is visible from camera at origin @@ -105,7 +148,7 @@ int getEdgeVisibility(int edgeIdx, float3 cameraPos) return 0; } -// Draw great circle arc in fragment shader +// Draw great circle arc in fragment shader with horizon clipping float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) { if (visibility == 0) return float4(0,0,0,0); // Hidden edge @@ -114,8 +157,12 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float float3 v1 = normalize(corners[edgeVerts.y]); float3 p = normalize(fragPos); // Current point on hemisphere - // Skip fragment if not in front of hemisphere or edge if both endpoints are behind horizon - if (p.z < 0.0f || (v0.z < 0.0f && v1.z < 0.0f)) + // HORIZON CLIPPING: Current fragment must be on front hemisphere + if (p.z < 0.0f) + return float4(0,0,0,0); + + // HORIZON CLIPPING: Skip edge if both endpoints are behind horizon + if (v0.z < 0.0f && v1.z < 0.0f) return float4(0,0,0,0); // Great circle plane normal @@ -149,36 +196,105 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float3 cameraPos = float3(0, 0, 0); float4 color = float4(0, 0, 0, 0); float2 p = toCircleSpace(vx.uv); // Convert 2D disk position to 3D hemisphere position - // p is in range [-CIRCLE_RADIUS, CIRCLE_RADIUS] - float2 normalized = p / CIRCLE_RADIUS; // Now in range [-1, 1] + float2 normalized = p / CIRCLE_RADIUS; float r2 = dot(normalized, normalized); - if (r2 > 1.0f) - discard; - // Convert UV to 3D position on hemisphere float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - computeCubeGeo(); // Your existing function + computeCubeGeo(); + + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3 viewDir = obbCenter; + + // Is this correct? + float dotX = dot(viewDir, float3(pc.modelMatrix[0][0], pc.modelMatrix[1][0], pc.modelMatrix[2][0])); + float dotY = dot(viewDir, float3(pc.modelMatrix[0][1], pc.modelMatrix[1][1], pc.modelMatrix[2][1])); + float dotZ = dot(viewDir, float3(pc.modelMatrix[0][2], pc.modelMatrix[1][2], pc.modelMatrix[2][2])); + + // Determine octant from ray direction signs + int octant = (dotX >= 0 ? 4 : 0) + + (dotY >= 0 ? 2 : 0) + + (dotZ >= 0 ? 1 : 0); + + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[octant], 1.0f); + } + + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - float aaWidth = length(float2(ddx(p.x), ddy(p.y))); + + // Draw the 6 silhouette edges + for (int i = 0; i < 6; i++) + { + int v0Idx = silhouettes[octant][i]; + int v1Idx = silhouettes[octant][(i + 1) % 6]; + + float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); + } - // Draw edges as great circle arcs - for (int j = 0; j < 12; j++) + // Draw the remaining edges (non-silhouette) in a different color + float3 hiddenEdgeColor = float3(0.3, 0.3, 0.3); // Gray color for hidden edges + + for (int i = 0; i < 12; i++) { - int a = j % 4 * (j < 4 ? 1 : 2) - (j / 4 == 1 ? j % 2 : 0); - int b = a + (4 >> (j / 4)); + int2 edge = allEdges[i]; + + // Check if this edge is already drawn as a silhouette edge + bool isSilhouette = false; + for (int j = 0; j < 6; j++) + { + int v0 = silhouettes[octant][j]; + int v1 = silhouettes[octant][(j + 1) % 6]; + + if ((edge.x == v0 && edge.y == v1) || (edge.x == v1 && edge.y == v0)) + { + isSilhouette = true; + break; + } + } - int visibility = getEdgeVisibility(j, cameraPos); - color += drawGreatCircleArc(spherePos, int2(a, b), visibility, aaWidth); + // Only draw if it's not a silhouette edge + if (!isSilhouette) + { + float4 edgeContribution = drawGreatCircleArc(spherePos, edge, 1, aaWidth); + color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + } + } + + // Draw corner labels for debugging + for (int i = 0; i < 8; i++) + { + float3 corner = normalize(corners[i]); + float2 cornerPos = corner.xy; + // Project corner onto 2D circle space + + // Distance from current fragment to corner + float dist = length(spherePos.xy - cornerPos); + + // Draw a small colored dot at the corner + float dotSize = 0.03f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + + if (dotAlpha > 0.0f) + { + float brightness = float(i) / 7.0f; + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } } color += drawRing(p, aaWidth); + + // if (r2 > 1.1f) + // color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file From 2e5642ab9614132821624235eda634fb23b4c609 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 8 Dec 2025 20:57:45 +0700 Subject: [PATCH 35/57] Remove example 73 mortons --- 73_Mortons/CTester.h | 401 ----------------------- 73_Mortons/app_resources/testCommon.hlsl | 258 --------------- 2 files changed, 659 deletions(-) delete mode 100644 73_Mortons/CTester.h delete mode 100644 73_Mortons/app_resources/testCommon.hlsl diff --git a/73_Mortons/CTester.h b/73_Mortons/CTester.h deleted file mode 100644 index b4097dad6..000000000 --- a/73_Mortons/CTester.h +++ /dev/null @@ -1,401 +0,0 @@ -#ifndef _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ -#define _NBL_EXAMPLES_TESTS_12_MORTON_C_TESTER_INCLUDED_ - -#include -#include "app_resources/testCommon.hlsl" -#include "ITester.h" - -using namespace nbl; - -class CTester final : public ITester -{ -public: - void performTests() - { - std::random_device rd; - std::mt19937 mt(rd()); - - std::uniform_int_distribution shortDistribution(uint16_t(0), std::numeric_limits::max()); - std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); - std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); - - m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); - for (int i = 0; i < Iterations; ++i) - { - // Set input thest values that will be used in both CPU and GPU tests - InputTestValues testInput; - // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values - TestValues expected; - - uint32_t generatedShift = intDistribution(mt) & uint32_t(63); - testInput.shift = generatedShift; - { - uint64_t generatedA = longDistribution(mt); - uint64_t generatedB = longDistribution(mt); - - testInput.generatedA = generatedA; - testInput.generatedB = generatedB; - - expected.emulatedAnd = _static_cast(generatedA & generatedB); - expected.emulatedOr = _static_cast(generatedA | generatedB); - expected.emulatedXor = _static_cast(generatedA ^ generatedB); - expected.emulatedNot = _static_cast(~generatedA); - expected.emulatedPlus = _static_cast(generatedA + generatedB); - expected.emulatedMinus = _static_cast(generatedA - generatedB); - expected.emulatedUnaryMinus = _static_cast(-generatedA); - expected.emulatedLess = uint32_t(generatedA < generatedB); - expected.emulatedLessEqual = uint32_t(generatedA <= generatedB); - expected.emulatedGreater = uint32_t(generatedA > generatedB); - expected.emulatedGreaterEqual = uint32_t(generatedA >= generatedB); - - expected.emulatedLeftShifted = _static_cast(generatedA << generatedShift); - expected.emulatedUnsignedRightShifted = _static_cast(generatedA >> generatedShift); - expected.emulatedSignedRightShifted = _static_cast(static_cast(generatedA) >> generatedShift); - } - { - testInput.coordX = longDistribution(mt); - testInput.coordY = longDistribution(mt); - testInput.coordZ = longDistribution(mt); - testInput.coordW = longDistribution(mt); - - uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; - uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - - uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2 ); - uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2 ); - uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); - uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); - uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); - uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); - - uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; - uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - - uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); - uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); - uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); - uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); - uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); - uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); - - uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; - uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - - uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); - uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); - uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); - uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); - uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); - uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); - - // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them - // so their highest bits are all 0s or 1s depending on the sign of the number they encode - - int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - - int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - - int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - - // Plus - expected.mortonPlus_small_2 = morton::code::create((Vec2ASmall + Vec2BSmall) & static_cast(smallBitsMask_2)); - expected.mortonPlus_medium_2 = morton::code::create((Vec2AMedium + Vec2BMedium) & static_cast(mediumBitsMask_2)); - expected.mortonPlus_full_2 = morton::code::create((Vec2AFull + Vec2BFull) & static_cast(fullBitsMask_2)); - expected.mortonPlus_emulated_2 = morton::code::create((Vec2AFull + Vec2BFull) & static_cast(fullBitsMask_2)); - - expected.mortonPlus_small_3 = morton::code::create((Vec3ASmall + Vec3BSmall) & static_cast(smallBitsMask_3)); - expected.mortonPlus_medium_3 = morton::code::create((Vec3AMedium + Vec3BMedium) & static_cast(mediumBitsMask_3)); - expected.mortonPlus_full_3 = morton::code::create((Vec3AFull + Vec3BFull) & static_cast(fullBitsMask_3)); - expected.mortonPlus_emulated_3 = morton::code::create((Vec3AFull + Vec3BFull) & static_cast(fullBitsMask_3)); - - expected.mortonPlus_small_4 = morton::code::create((Vec4ASmall + Vec4BSmall) & static_cast(smallBitsMask_4)); - expected.mortonPlus_medium_4 = morton::code::create((Vec4AMedium + Vec4BMedium) & static_cast(mediumBitsMask_4)); - expected.mortonPlus_full_4 = morton::code::create((Vec4AFull + Vec4BFull) & static_cast(fullBitsMask_4)); - expected.mortonPlus_emulated_4 = morton::code::create((Vec4AFull + Vec4BFull) & static_cast(fullBitsMask_4)); - - // // Minus - // expected.mortonMinus_small_2 = morton::code::create(Vec2ASmall - Vec2BSmall); - // expected.mortonMinus_medium_2 = morton::code::create(Vec2AMedium - Vec2BMedium); - // expected.mortonMinus_full_2 = morton::code::create(Vec2AFull - Vec2BFull); - // expected.mortonMinus_emulated_2 = morton::code::create(Vec2AFull - Vec2BFull); - // - // expected.mortonMinus_small_3 = morton::code::create(Vec3ASmall - Vec3BSmall); - // expected.mortonMinus_medium_3 = morton::code::create(Vec3AMedium - Vec3BMedium); - // expected.mortonMinus_full_3 = morton::code::create(Vec3AFull - Vec3BFull); - // expected.mortonMinus_emulated_3 = morton::code::create(Vec3AFull - Vec3BFull); - // - // expected.mortonMinus_small_4 = morton::code::create(Vec4ASmall - Vec4BSmall); - // expected.mortonMinus_medium_4 = morton::code::create(Vec4AMedium - Vec4BMedium); - // expected.mortonMinus_full_4 = morton::code::create(Vec4AFull - Vec4BFull); - // expected.mortonMinus_emulated_4 = morton::code::create(Vec4AFull - Vec4BFull); - // - // Coordinate-wise equality - expected.mortonEqual_small_2 = uint32_t2(glm::equal(Vec2ASmall, Vec2BSmall)); - expected.mortonEqual_medium_2 = uint32_t2(glm::equal(Vec2AMedium, Vec2BMedium)); - expected.mortonEqual_full_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - expected.mortonEqual_emulated_2 = uint32_t2(glm::equal(Vec2AFull, Vec2BFull)); - - expected.mortonEqual_small_3 = uint32_t3(glm::equal(Vec3ASmall, Vec3BSmall)); - expected.mortonEqual_medium_3 = uint32_t3(glm::equal(Vec3AMedium, Vec3BMedium)); - expected.mortonEqual_full_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - expected.mortonEqual_emulated_3 = uint32_t3(glm::equal(Vec3AFull, Vec3BFull)); - - expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); - expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); - expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); - - // Coordinate-wise unsigned inequality (just testing with less) - expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); - expected.mortonUnsignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2AMedium, Vec2BMedium)); - expected.mortonUnsignedLess_full_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - expected.mortonUnsignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2AFull, Vec2BFull)); - - expected.mortonUnsignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASmall, Vec3BSmall)); - expected.mortonUnsignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3AMedium, Vec3BMedium)); - expected.mortonUnsignedLess_full_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - expected.mortonUnsignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3AFull, Vec3BFull)); - - expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); - expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); - expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); - - // Coordinate-wise signed inequality - expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); - expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); - expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); - - expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); - expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); - expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); - - expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); - expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); - expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); - - uint16_t castedShift = uint16_t(generatedShift); - // Left-shift - expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - - expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - - expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - - // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2))& uint32_t(fullBitsMask_2)); - - expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3))& uint32_t(fullBitsMask_3)); - - expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); - - // Signed right-shift - // expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); - // expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); - // expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); - // - // expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); - // expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); - // expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); - // - // expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); - // expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); - // expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); - } - - performCpuTests(testInput, expected); - performGpuTests(testInput, expected); - } - m_logger->log("FIRST TESTS DONE.", system::ILogger::ELL_PERFORMANCE); - } - -private: - inline static constexpr int Iterations = 100u; - - void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues cpuTestValues; - - fillTestValues(commonTestInputValues, cpuTestValues); - verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); - - } - - void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) - { - TestValues gpuTestValues; - gpuTestValues = dispatch(commonTestInputValues); - verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); - } - - void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) - { - verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); - verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); - verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); - verifyTestValue("emulatedNot", expectedTestValues.emulatedNot, testValues.emulatedNot, testType); - verifyTestValue("emulatedPlus", expectedTestValues.emulatedPlus, testValues.emulatedPlus, testType); - verifyTestValue("emulatedMinus", expectedTestValues.emulatedMinus, testValues.emulatedMinus, testType); - verifyTestValue("emulatedLess", expectedTestValues.emulatedLess, testValues.emulatedLess, testType); - verifyTestValue("emulatedLessEqual", expectedTestValues.emulatedLessEqual, testValues.emulatedLessEqual, testType); - verifyTestValue("emulatedGreater", expectedTestValues.emulatedGreater, testValues.emulatedGreater, testType); - verifyTestValue("emulatedGreaterEqual", expectedTestValues.emulatedGreaterEqual, testValues.emulatedGreaterEqual, testType); - verifyTestValue("emulatedLeftShifted", expectedTestValues.emulatedLeftShifted, testValues.emulatedLeftShifted, testType); - verifyTestValue("emulatedUnsignedRightShifted", expectedTestValues.emulatedUnsignedRightShifted, testValues.emulatedUnsignedRightShifted, testType); - verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); - verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); - - // Morton Plus - verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); - verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); - verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); - verifyTestValue("mortonPlus_emulated_2", expectedTestValues.mortonPlus_emulated_2, testValues.mortonPlus_emulated_2, testType); - - verifyTestValue("mortonPlus_small_3", expectedTestValues.mortonPlus_small_3, testValues.mortonPlus_small_3, testType); - verifyTestValue("mortonPlus_medium_3", expectedTestValues.mortonPlus_medium_3, testValues.mortonPlus_medium_3, testType); - verifyTestValue("mortonPlus_full_3", expectedTestValues.mortonPlus_full_3, testValues.mortonPlus_full_3, testType); - verifyTestValue("mortonPlus_emulated_3", expectedTestValues.mortonPlus_emulated_3, testValues.mortonPlus_emulated_3, testType); - - verifyTestValue("mortonPlus_small_4", expectedTestValues.mortonPlus_small_4, testValues.mortonPlus_small_4, testType); - verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); - verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); - verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); - - // Morton Minus - verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); - verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); - verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); - verifyTestValue("mortonMinus_emulated_2", expectedTestValues.mortonMinus_emulated_2, testValues.mortonMinus_emulated_2, testType); - - verifyTestValue("mortonMinus_small_3", expectedTestValues.mortonMinus_small_3, testValues.mortonMinus_small_3, testType); - verifyTestValue("mortonMinus_medium_3", expectedTestValues.mortonMinus_medium_3, testValues.mortonMinus_medium_3, testType); - verifyTestValue("mortonMinus_full_3", expectedTestValues.mortonMinus_full_3, testValues.mortonMinus_full_3, testType); - verifyTestValue("mortonMinus_emulated_3", expectedTestValues.mortonMinus_emulated_3, testValues.mortonMinus_emulated_3, testType); - - verifyTestValue("mortonMinus_small_4", expectedTestValues.mortonMinus_small_4, testValues.mortonMinus_small_4, testType); - verifyTestValue("mortonMinus_medium_4", expectedTestValues.mortonMinus_medium_4, testValues.mortonMinus_medium_4, testType); - verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); - verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); - - // Morton coordinate-wise equality - verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); - verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); - verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); - verifyTestValue("mortonEqual_emulated_2", expectedTestValues.mortonEqual_emulated_2, testValues.mortonEqual_emulated_2, testType); - - verifyTestValue("mortonEqual_small_3", expectedTestValues.mortonEqual_small_3, testValues.mortonEqual_small_3, testType); - verifyTestValue("mortonEqual_medium_3", expectedTestValues.mortonEqual_medium_3, testValues.mortonEqual_medium_3, testType); - verifyTestValue("mortonEqual_full_3", expectedTestValues.mortonEqual_full_3, testValues.mortonEqual_full_3, testType); - verifyTestValue("mortonEqual_emulated_3", expectedTestValues.mortonEqual_emulated_3, testValues.mortonEqual_emulated_3, testType); - - verifyTestValue("mortonEqual_small_4", expectedTestValues.mortonEqual_small_4, testValues.mortonEqual_small_4, testType); - verifyTestValue("mortonEqual_medium_4", expectedTestValues.mortonEqual_medium_4, testValues.mortonEqual_medium_4, testType); - verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); - - // Morton coordinate-wise unsigned inequality - verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); - verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); - verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); - verifyTestValue("mortonUnsignedLess_emulated_2", expectedTestValues.mortonUnsignedLess_emulated_2, testValues.mortonUnsignedLess_emulated_2, testType); - - verifyTestValue("mortonUnsignedLess_small_3", expectedTestValues.mortonUnsignedLess_small_3, testValues.mortonUnsignedLess_small_3, testType); - verifyTestValue("mortonUnsignedLess_medium_3", expectedTestValues.mortonUnsignedLess_medium_3, testValues.mortonUnsignedLess_medium_3, testType); - verifyTestValue("mortonUnsignedLess_full_3", expectedTestValues.mortonUnsignedLess_full_3, testValues.mortonUnsignedLess_full_3, testType); - verifyTestValue("mortonUnsignedLess_emulated_3", expectedTestValues.mortonUnsignedLess_emulated_3, testValues.mortonUnsignedLess_emulated_3, testType); - - verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); - verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); - verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); - - // Morton coordinate-wise signed inequality - verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); - verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); - verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); - - verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); - verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); - verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); - - verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); - verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); - verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - - // Morton left-shift - verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); - verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); - verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); - verifyTestValue("mortonLeftShift_emulated_2", expectedTestValues.mortonLeftShift_emulated_2, testValues.mortonLeftShift_emulated_2, testType); - - verifyTestValue("mortonLeftShift_small_3", expectedTestValues.mortonLeftShift_small_3, testValues.mortonLeftShift_small_3, testType); - verifyTestValue("mortonLeftShift_medium_3", expectedTestValues.mortonLeftShift_medium_3, testValues.mortonLeftShift_medium_3, testType); - verifyTestValue("mortonLeftShift_full_3", expectedTestValues.mortonLeftShift_full_3, testValues.mortonLeftShift_full_3, testType); - verifyTestValue("mortonLeftShift_emulated_3", expectedTestValues.mortonLeftShift_emulated_3, testValues.mortonLeftShift_emulated_3, testType); - - verifyTestValue("mortonLeftShift_small_4", expectedTestValues.mortonLeftShift_small_4, testValues.mortonLeftShift_small_4, testType); - verifyTestValue("mortonLeftShift_medium_4", expectedTestValues.mortonLeftShift_medium_4, testValues.mortonLeftShift_medium_4, testType); - verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); - verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); - - // Morton unsigned right-shift - verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); - verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); - verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_2", expectedTestValues.mortonUnsignedRightShift_emulated_2, testValues.mortonUnsignedRightShift_emulated_2, testType); - - verifyTestValue("mortonUnsignedRightShift_small_3", expectedTestValues.mortonUnsignedRightShift_small_3, testValues.mortonUnsignedRightShift_small_3, testType); - verifyTestValue("mortonUnsignedRightShift_medium_3", expectedTestValues.mortonUnsignedRightShift_medium_3, testValues.mortonUnsignedRightShift_medium_3, testType); - verifyTestValue("mortonUnsignedRightShift_full_3", expectedTestValues.mortonUnsignedRightShift_full_3, testValues.mortonUnsignedRightShift_full_3, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_3", expectedTestValues.mortonUnsignedRightShift_emulated_3, testValues.mortonUnsignedRightShift_emulated_3, testType); - - verifyTestValue("mortonUnsignedRightShift_small_4", expectedTestValues.mortonUnsignedRightShift_small_4, testValues.mortonUnsignedRightShift_small_4, testType); - verifyTestValue("mortonUnsignedRightShift_medium_4", expectedTestValues.mortonUnsignedRightShift_medium_4, testValues.mortonUnsignedRightShift_medium_4, testType); - verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); - verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); - - // Morton signed right-shift - verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); - verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); - verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); - - verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); - verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); - verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); - - verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); - verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); - verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); - } -}; - -#endif \ No newline at end of file diff --git a/73_Mortons/app_resources/testCommon.hlsl b/73_Mortons/app_resources/testCommon.hlsl deleted file mode 100644 index 93205db62..000000000 --- a/73_Mortons/app_resources/testCommon.hlsl +++ /dev/null @@ -1,258 +0,0 @@ -#include "common.hlsl" - -template -morton::code createMortonFromAnyVec(vector, Dim> val) -{ - using morton_code_t = morton::code; - using decode_element_t = typename morton_code_t::decode_component_t ; - NBL_IF_CONSTEXPR(Signed) - { - return morton_code_t::create(_static_cast >(val & )); - - } -} - -void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) -{ - emulated_uint64_t emulatedA = _static_cast(input.generatedA); - emulated_uint64_t emulatedB = _static_cast(input.generatedB); - emulated_int64_t signedEmulatedA = _static_cast(input.generatedA); - - // Emulated int tests - output.emulatedAnd = emulatedA & emulatedB; - output.emulatedOr = emulatedA | emulatedB; - output.emulatedXor = emulatedA ^ emulatedB; - output.emulatedNot = emulatedA.operator~(); - output.emulatedPlus = emulatedA + emulatedB; - output.emulatedMinus = emulatedA - emulatedB; - output.emulatedLess = uint32_t(emulatedA < emulatedB); - output.emulatedLessEqual = uint32_t(emulatedA <= emulatedB); - output.emulatedGreater = uint32_t(emulatedA > emulatedB); - output.emulatedGreaterEqual = uint32_t(emulatedA >= emulatedB); - - left_shift_operator leftShift; - output.emulatedLeftShifted = leftShift(emulatedA, input.shift); - - arithmetic_right_shift_operator unsignedRightShift; - output.emulatedUnsignedRightShifted = unsignedRightShift(emulatedA, input.shift); - - arithmetic_right_shift_operator signedRightShift; - output.emulatedSignedRightShifted = signedRightShift(signedEmulatedA, input.shift); - - output.emulatedUnaryMinus = signedEmulatedA.operator-(); - - // Morton tests - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - int64_t2 Vec2ASigned = int64_t2(Vec2A); - int64_t2 Vec2BSigned = int64_t2(Vec2B); - - int64_t3 Vec3ASigned = int64_t3(Vec3A); - int64_t3 Vec3BSigned = int64_t3(Vec3B); - - int64_t4 Vec4ASigned = int64_t4(Vec4A); - int64_t4 Vec4BSigned = int64_t4(Vec4B); - - morton::code morton_small_2A = createMortonFromAnyVec(Vec2A); - morton::code morton_medium_2A = createMortonFromAnyVec(Vec2A); - morton::code morton_full_2A = createMortonFromAnyVec(Vec2A); - morton::code morton_emulated_2A = createMortonFromAnyVec(Vec2A); - morton::code morton_small_2B = createMortonFromAnyVec(Vec2B); - morton::code morton_medium_2B = createMortonFromAnyVec(Vec2B); - morton::code morton_full_2B = createMortonFromAnyVec(Vec2B); - morton::code morton_emulated_2B = createMortonFromAnyVec(Vec2B); - - morton::code morton_small_3A = createMortonFromAnyVec(Vec3A); - morton::code morton_medium_3A = createMortonFromAnyVec(Vec3A); - morton::code morton_full_3A = createMortonFromAnyVec(Vec3A); - morton::code morton_emulated_3A = createMortonFromAnyVec(Vec3A); - morton::code morton_small_3B = createMortonFromAnyVec(Vec3B); - morton::code morton_medium_3B = createMortonFromAnyVec(Vec3B); - morton::code morton_full_3B = createMortonFromAnyVec(Vec3B); - morton::code morton_emulated_3B = createMortonFromAnyVec(Vec3B); - - morton::code morton_small_4A = createMortonFromAnyVec(Vec4A); - morton::code morton_medium_4A = createMortonFromAnyVec(Vec4A); - morton::code morton_full_4A = createMortonFromAnyVec(Vec4A); - morton::code morton_emulated_4A = createMortonFromAnyVec(Vec4A); - morton::code morton_small_4B = createMortonFromAnyVec(Vec4B); - morton::code morton_medium_4B = createMortonFromAnyVec(Vec4B); - morton::code morton_full_4B = createMortonFromAnyVec(Vec4B); - morton::code morton_emulated_4B = createMortonFromAnyVec(Vec4B); - - morton::code morton_small_2_signed = createMortonFromAnyVec(Vec2ASigned); - morton::code morton_medium_2_signed = createMortonFromAnyVec(Vec2ASigned); - morton::code morton_full_2_signed = createMortonFromAnyVec(Vec2ASigned); - - morton::code morton_small_3_signed = createMortonFromAnyVec(Vec3ASigned); - morton::code morton_medium_3_signed = createMortonFromAnyVec(Vec3ASigned); - morton::code morton_full_3_signed = createMortonFromAnyVec(Vec3ASigned); - - morton::code morton_small_4_signed = createMortonFromAnyVec(Vec4ASigned); - morton::code morton_medium_4_signed = createMortonFromAnyVec(Vec4ASigned); - morton::code morton_full_4_signed = createMortonFromAnyVec(Vec4ASigned); - - // Plus - output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; - output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; - output.mortonPlus_full_2 = morton_full_2A + morton_full_2B; - output.mortonPlus_emulated_2 = morton_emulated_2A + morton_emulated_2B; - - output.mortonPlus_small_3 = morton_small_3A + morton_small_3B; - output.mortonPlus_medium_3 = morton_medium_3A + morton_medium_3B; - output.mortonPlus_full_3 = morton_full_3A + morton_full_3B; - output.mortonPlus_emulated_3 = morton_emulated_3A + morton_emulated_3B; - - output.mortonPlus_small_4 = morton_small_4A + morton_small_4B; - output.mortonPlus_medium_4 = morton_medium_4A + morton_medium_4B; - output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; - output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - - // Minus - output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; - output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; - output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; - output.mortonMinus_emulated_2 = morton_emulated_2A - morton_emulated_2B; - - output.mortonMinus_small_3 = morton_small_3A - morton_small_3B; - output.mortonMinus_medium_3 = morton_medium_3A - morton_medium_3B; - output.mortonMinus_full_3 = morton_full_3A - morton_full_3B; - output.mortonMinus_emulated_3 = morton_emulated_3A - morton_emulated_3B; - - output.mortonMinus_small_4 = morton_small_4A - morton_small_4B; - output.mortonMinus_medium_4 = morton_medium_4A - morton_medium_4B; - output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; - output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - - // Coordinate-wise equality - output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); - output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); - - output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); - output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); - - output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); - - // Coordinate-wise unsigned inequality (just testing with less) - output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); - output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); - - output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); - output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); - - output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); - - // Coordinate-wise signed inequality - output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2BSigned))); - output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2BSigned))); - output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2BSigned))); - - output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3BSigned))); - output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3BSigned))); - output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3BSigned))); - - output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4BSigned))); - output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4BSigned))); - output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4BSigned))); - - // Cast to uint16_t which is what left shift for Mortons expect - uint16_t castedShift = uint16_t(input.shift); - // Each left shift clamps to correct bits so the result kinda makes sense - // Left-shift - left_shift_operator > leftShiftSmall2; - output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); - left_shift_operator > leftShiftMedium2; - output.mortonLeftShift_medium_2 = leftShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - left_shift_operator > leftShiftFull2; - output.mortonLeftShift_full_2 = leftShiftFull2(morton_full_2A, castedShift % fullBits_2); - left_shift_operator > leftShiftEmulated2; - output.mortonLeftShift_emulated_2 = leftShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - left_shift_operator > leftShiftSmall3; - output.mortonLeftShift_small_3 = leftShiftSmall3(morton_small_3A, castedShift % smallBits_3); - left_shift_operator > leftShiftMedium3; - output.mortonLeftShift_medium_3 = leftShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - left_shift_operator > leftShiftFull3; - output.mortonLeftShift_full_3 = leftShiftFull3(morton_full_3A, castedShift % fullBits_3); - left_shift_operator > leftShiftEmulated3; - output.mortonLeftShift_emulated_3 = leftShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - left_shift_operator > leftShiftSmall4; - output.mortonLeftShift_small_4 = leftShiftSmall4(morton_small_4A, castedShift % smallBits_4); - left_shift_operator > leftShiftMedium4; - output.mortonLeftShift_medium_4 = leftShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - left_shift_operator > leftShiftFull4; - output.mortonLeftShift_full_4 = leftShiftFull4(morton_full_4A, castedShift % fullBits_4); - left_shift_operator > leftShiftEmulated4; - output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Unsigned right-shift - arithmetic_right_shift_operator > rightShiftSmall2; - output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftMedium2; - output.mortonUnsignedRightShift_medium_2 = rightShiftMedium2(morton_medium_2A, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftFull2; - output.mortonUnsignedRightShift_full_2 = rightShiftFull2(morton_full_2A, castedShift % fullBits_2); - arithmetic_right_shift_operator > rightShiftEmulated2; - output.mortonUnsignedRightShift_emulated_2 = rightShiftEmulated2(morton_emulated_2A, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSmall3; - output.mortonUnsignedRightShift_small_3 = rightShiftSmall3(morton_small_3A, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftMedium3; - output.mortonUnsignedRightShift_medium_3 = rightShiftMedium3(morton_medium_3A, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftFull3; - output.mortonUnsignedRightShift_full_3 = rightShiftFull3(morton_full_3A, castedShift % fullBits_3); - arithmetic_right_shift_operator > rightShiftEmulated3; - output.mortonUnsignedRightShift_emulated_3 = rightShiftEmulated3(morton_emulated_3A, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSmall4; - output.mortonUnsignedRightShift_small_4 = rightShiftSmall4(morton_small_4A, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftMedium4; - output.mortonUnsignedRightShift_medium_4 = rightShiftMedium4(morton_medium_4A, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftFull4; - output.mortonUnsignedRightShift_full_4 = rightShiftFull4(morton_full_4A, castedShift % fullBits_4); - arithmetic_right_shift_operator > rightShiftEmulated4; - output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - - // Signed right-shift - arithmetic_right_shift_operator > rightShiftSignedSmall2; - output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); - arithmetic_right_shift_operator > rightShiftSignedMedium2; - output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); - arithmetic_right_shift_operator > rightShiftSignedFull2; - output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); - - arithmetic_right_shift_operator > rightShiftSignedSmall3; - output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); - arithmetic_right_shift_operator > rightShiftSignedMedium3; - output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); - arithmetic_right_shift_operator > rightShiftSignedFull3; - output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); - - arithmetic_right_shift_operator > rightShiftSignedSmall4; - output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); - arithmetic_right_shift_operator > rightShiftSignedMedium4; - output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); - arithmetic_right_shift_operator > rightShiftSignedFull4; - output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); -} \ No newline at end of file From 91ae8657dee9b4de82c81b97b23b83d3824a6011 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:20:01 +0300 Subject: [PATCH 36/57] Fixed main camera aspect ratio, added 27 configurations for cube silhouette --- .../hlsl/SolidAngleVis.frag.hlsl | 248 ++++++++++++------ 72_SolidAngleVisualizer/include/transform.hpp | 2 +- 72_SolidAngleVisualizer/main.cpp | 9 +- 3 files changed, 167 insertions(+), 92 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 7c96a8316..fa0805356 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -53,29 +53,84 @@ static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; -static const float3 colorLUT[8] = { +static const float3 colorLUT[27] = { + // Row 1: Pure and bright colors float3(0, 0, 0), // 0: Black - float3(1, 0, 0), // 1: Red - float3(0, 1, 0), // 2: Green - float3(1, 1, 0), // 3: Yellow - float3(0, 0, 1), // 4: Blue - float3(1, 0, 1), // 5: Magenta - float3(0, 1, 1), // 6: Cyan - float3(1, 1, 1) // 7: White + float3(1, 1, 1), // 1: White + float3(0.5, 0.5, 0.5), // 2: Gray + + // Row 2: Primary colors + float3(1, 0, 0), // 3: Red + float3(0, 1, 0), // 4: Green + float3(0, 0, 1), // 5: Blue + + // Row 3: Secondary colors + float3(1, 1, 0), // 6: Yellow + float3(1, 0, 1), // 7: Magenta + float3(0, 1, 1), // 8: Cyan + + // Row 4: Orange family + float3(1, 0.5, 0), // 9: Orange + float3(1, 0.65, 0), // 10: Light Orange + float3(0.8, 0.4, 0), // 11: Dark Orange + + // Row 5: Pink/Rose family + float3(1, 0.4, 0.7), // 12: Pink + float3(1, 0.75, 0.8), // 13: Light Pink + float3(0.7, 0.1, 0.3), // 14: Deep Rose + + // Row 6: Purple/Violet family + float3(0.5, 0, 0.5), // 15: Purple + float3(0.6, 0.4, 0.8), // 16: Light Purple + float3(0.3, 0, 0.5), // 17: Indigo + + // Row 7: Green variations + float3(0, 0.5, 0), // 18: Dark Green + float3(0.5, 1, 0), // 19: Lime + float3(0, 0.5, 0.25), // 20: Forest Green + + // Row 8: Blue variations + float3(0, 0, 0.5), // 21: Navy + float3(0.3, 0.7, 1), // 22: Sky Blue + float3(0, 0.4, 0.6), // 23: Teal + + // Row 9: Earth tones + float3(0.6, 0.4, 0.2), // 24: Brown + float3(0.8, 0.7, 0.3), // 25: Tan/Beige + float3(0.4, 0.3, 0.1) // 26: Dark Brown }; // Vertices are ordered CCW relative to the camera view. -static const int silhouettes[8][6] = { - {2, 3, 1, 5, 4, 6}, // 0: Black - {6, 7, 5, 1, 0, 2}, // 1: Red - {7, 6, 4, 0, 1, 3}, // 2: Green - {3, 7, 5, 4, 0, 2}, // 3: Yellow - {3, 2, 0, 4, 5, 7}, // 4: Cyan - {1, 3, 7, 6, 4, 0}, // 5: Magenta - {0, 1, 5, 7, 6, 2}, // 6: White - {4, 6, 2, 3, 1, 5} // 7: Gray +static const int silhouettes[27][7] = { + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5,}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; // Converts UV into centered, aspect-corrected NDC circle space @@ -106,6 +161,33 @@ void computeCubeGeo() } } +float4 drawCorners(float3 spherePos, float aaWidth) +{ + float4 color = float4(0,0,0,0); + // Draw corner labels for debugging + for (int i = 0; i < 8; i++) + { + float3 corner = normalize(corners[i]); + float2 cornerPos = corner.xy; + // Project corner onto 2D circle space + + // Distance from current fragment to corner + float dist = length(spherePos.xy - cornerPos); + + // Draw a small colored dot at the corner + float dotSize = 0.03f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + + if (dotAlpha > 0.0f) + { + float brightness = float(i) / 7.0f; + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } + } + return color; +} + float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); @@ -194,54 +276,11 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float return edgeColor * alpha * intensity; } -[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) { - float4 color = float4(0, 0, 0, 0); - float2 p = toCircleSpace(vx.uv); - - // Convert 2D disk position to 3D hemisphere position - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); - - // Convert UV to 3D position on hemisphere - float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - - computeCubeGeo(); - - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; - - float3 viewDir = obbCenter; - - // Is this correct? - float dotX = dot(viewDir, float3(pc.modelMatrix[0][0], pc.modelMatrix[1][0], pc.modelMatrix[2][0])); - float dotY = dot(viewDir, float3(pc.modelMatrix[0][1], pc.modelMatrix[1][1], pc.modelMatrix[2][1])); - float dotZ = dot(viewDir, float3(pc.modelMatrix[0][2], pc.modelMatrix[1][2], pc.modelMatrix[2][2])); - - // Determine octant from ray direction signs - int octant = (dotX >= 0 ? 4 : 0) + - (dotY >= 0 ? 2 : 0) + - (dotZ >= 0 ? 1 : 0); - - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[octant], 1.0f); - } - - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - - - // Draw the 6 silhouette edges - for (int i = 0; i < 6; i++) - { - int v0Idx = silhouettes[octant][i]; - int v1Idx = silhouettes[octant][(i + 1) % 6]; - - float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - } - + float4 color = float4(0,0,0,0); // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.3, 0.3, 0.3); // Gray color for hidden edges + float3 hiddenEdgeColor = float3(0.3, 0.3, 0); // dark yellow color for hidden edges for (int i = 0; i < 12; i++) { @@ -249,12 +288,14 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float // Check if this edge is already drawn as a silhouette edge bool isSilhouette = false; - for (int j = 0; j < 6; j++) + int vertexCount = silhouettes[configIndex][0]; + // Draw the 6 silhouette edges + for (int i = 0; i < vertexCount; i++) { - int v0 = silhouettes[octant][j]; - int v1 = silhouettes[octant][(j + 1) % 6]; + int v0Idx = silhouettes[configIndex][i + 1]; + int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - if ((edge.x == v0 && edge.y == v1) || (edge.x == v1 && edge.y == v0)) + if ((edge.x == v0Idx && edge.y == v1Idx) || (edge.x == v1Idx && edge.y == v0Idx)) { isSilhouette = true; break; @@ -268,33 +309,66 @@ float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); } } + return color; +} - // Draw corner labels for debugging - for (int i = 0; i < 8; i++) +[[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 +{ + float4 color = float4(0, 0, 0, 0); + float2 p = toCircleSpace(vx.uv); + + // Convert 2D disk position to 3D hemisphere position + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) { - float3 corner = normalize(corners[i]); - float2 cornerPos = corner.xy; - // Project corner onto 2D circle space - - // Distance from current fragment to corner - float dist = length(spherePos.xy - cornerPos); - - // Draw a small colored dot at the corner - float dotSize = 0.03f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + return float4(colorLUT[configIndex], 1.0f); + } + + // Convert UV to 3D position on hemisphere + float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); + + computeCubeGeo(); + + // Get OBB center in world space + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 proj = mul(obbCenter, rotMatrix); // Get all 3 projections at once + + // Get squared column lengths + float lenSqX = dot(rotMatrix[0], rotMatrix[0]); + float lenSqY = dot(rotMatrix[1], rotMatrix[1]); + float lenSqZ = dot(rotMatrix[2], rotMatrix[2]); + + int3 region = int3( + proj.x < -lenSqX ? 0 : (proj.x > lenSqX ? 2 : 1), + proj.y < -lenSqY ? 0 : (proj.y > lenSqY ? 2 : 1), + proj.z < -lenSqZ ? 0 : (proj.z > lenSqZ ? 2 : 1) + ); + + int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 + + int vertexCount = silhouettes[configIndex][0]; + for (int i = 0; i < vertexCount; i++) + { + int v0Idx = silhouettes[configIndex][i + 1]; + int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - if (dotAlpha > 0.0f) - { - float brightness = float(i) / 7.0f; - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); - } + float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); } + color += drawHiddenEdges(spherePos, configIndex, aaWidth); + + color += drawCorners(spherePos, aaWidth); + color += drawRing(p, aaWidth); - // if (r2 > 1.1f) - // color.a = 0.0f; // Outside circle, make transparent + if (r2 > 1.1f) + color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 639c0fa3a..105b2f757 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -19,7 +19,7 @@ struct TransformRequestParams struct TransformReturnInfo { - nbl::hlsl::uint16_t2 sceneResolution = { 0, 0 }; + nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; bool isGizmoWindowHovered; bool isGizmoBeingUsed; }; diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 8fb8bf144..5f73797a6 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -753,16 +753,17 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - matrix4SIMD projection; + const auto& sceneRes = mainViewTransformReturnInfo.sceneResolution; + matrix4SIMD projection; if (isPerspective) if (isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), sceneRes.x / sceneRes.y, zNear, zFar); else { - float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; + float viewHeight = viewWidth * sceneRes.y / sceneRes.x; if (isLH) projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); From 0124cc9c0ad83d4a38f1e8ac3ddcdf56125740ac Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:30:34 +0300 Subject: [PATCH 37/57] Shader fixes, bast uint16 resolutionf to float --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 16 +++++++++------- 72_SolidAngleVisualizer/main.cpp | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index fa0805356..ec30c2b64 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -322,10 +322,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) float r2 = dot(normalized, normalized); float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } + // Convert UV to 3D position on hemisphere float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); @@ -350,7 +347,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) ); int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 - + int vertexCount = silhouettes[configIndex][0]; for (int i = 0; i < vertexCount; i++) { @@ -367,8 +364,13 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) color += drawRing(p, aaWidth); - if (r2 > 1.1f) - color.a = 0.0f; // Outside circle, make transparent + if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + + // if (r2 > 1.1f) + // color.a = 0.0f; // Outside circle, make transparent return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 5f73797a6..85685e705 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -753,7 +753,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // TODO: why is this a lambda and not just an assignment in a scope ? camera.setProjectionMatrix([&]() { - const auto& sceneRes = mainViewTransformReturnInfo.sceneResolution; + const auto& sceneRes = float16_t2(mainViewTransformReturnInfo.sceneResolution); matrix4SIMD projection; if (isPerspective) From a35eddd1bd83fbf636e820b59c6eef939ed09668 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 9 Dec 2025 00:44:42 +0300 Subject: [PATCH 38/57] Better color for non-silhouette edges --- .../app_resources/hlsl/SolidAngleVis.frag.hlsl | 2 +- 72_SolidAngleVisualizer/main.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index ec30c2b64..51cb1946d 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -280,7 +280,7 @@ float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) { float4 color = float4(0,0,0,0); // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.3, 0.3, 0); // dark yellow color for hidden edges + float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); // dark yellow color for hidden edges for (int i = 0; i < 12; i++) { diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 85685e705..e9266520d 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -933,9 +933,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR transformParams.editTransformDecomposition = true; mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); - // MODEL: Zup -> Yup - - m_OBBModelMatrix = imguizmoM16InOut.model; // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); From 197b46afe5df4239958cd57fbe4aae8921dd9eb4 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 9 Dec 2025 23:39:30 +0700 Subject: [PATCH 39/57] Enable second test set --- 14_Mortons/CTester.h | 106 +++++++++++++++++++++- 14_Mortons/ITester.h | 1 - 14_Mortons/app_resources/common.hlsl | 6 +- 14_Mortons/app_resources/test2.comp.hlsl | 17 ++++ 14_Mortons/app_resources/testCommon.hlsl | 25 ++--- 14_Mortons/app_resources/testCommon2.hlsl | 40 ++++++++ 14_Mortons/main.cpp | 15 +-- 7 files changed, 179 insertions(+), 31 deletions(-) create mode 100644 14_Mortons/app_resources/test2.comp.hlsl create mode 100644 14_Mortons/app_resources/testCommon2.hlsl diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 4c8b4276e..342cbcc00 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -3,6 +3,7 @@ #include #include "app_resources/testCommon.hlsl" +#include "app_resources/testCommon2.hlsl" #include "ITester.h" using namespace nbl; @@ -158,6 +159,7 @@ class CTester final : public ITester expected.mortonEqual_small_4 = uint32_t4(glm::equal(Vec4ASmall, Vec4BSmall)); expected.mortonEqual_medium_4 = uint32_t4(glm::equal(Vec4AMedium, Vec4BMedium)); expected.mortonEqual_full_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); + expected.mortonEqual_emulated_4 = uint32_t4(glm::equal(Vec4AFull, Vec4BFull)); // Coordinate-wise unsigned inequality (just testing with less) expected.mortonUnsignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASmall, Vec2BSmall)); @@ -343,17 +345,14 @@ class CTester final : public ITester verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); - verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); - verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); // // Morton left-shift verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); @@ -402,4 +401,105 @@ class CTester final : public ITester } }; +class CTester2 final : public ITester +{ +public: + void performTests() + { + std::random_device rd; + std::mt19937 mt(rd()); + + std::uniform_int_distribution intDistribution(uint32_t(0), std::numeric_limits::max()); + std::uniform_int_distribution longDistribution(uint64_t(0), std::numeric_limits::max()); + + m_logger->log("TESTS:", system::ILogger::ELL_PERFORMANCE); + for (int i = 0; i < Iterations; ++i) + { + // Set input thest values that will be used in both CPU and GPU tests + InputTestValues testInput; + // use std library or glm functions to determine expected test values, the output of functions from intrinsics.hlsl will be verified against these values + TestValues expected; + + uint32_t generatedShift = intDistribution(mt) & uint32_t(63); + testInput.shift = generatedShift; + { + testInput.coordX = longDistribution(mt); + testInput.coordY = longDistribution(mt); + testInput.coordZ = longDistribution(mt); + testInput.coordW = longDistribution(mt); + + uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; + uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; + + uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; + uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; + + uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; + uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; + + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + + uint16_t castedShift = uint16_t(generatedShift); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2A << uint64_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3A << uint64_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4A << uint64_t(castedShift % fullBits_4)); + + } + + performCpuTests(testInput, expected); + // performGpuTests(testInput, expected); + } + m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); + } + +private: + inline static constexpr int Iterations = 100u; + + void performCpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues cpuTestValues; + + fillTestValues2(commonTestInputValues, cpuTestValues); + verifyTestValues(expectedTestValues, cpuTestValues, ITester::TestType::CPU); + + } + + void performGpuTests(const InputTestValues& commonTestInputValues, const TestValues& expectedTestValues) + { + TestValues gpuTestValues; + gpuTestValues = dispatch(commonTestInputValues); + verifyTestValues(expectedTestValues, gpuTestValues, ITester::TestType::GPU); + } + + void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) + { + + verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); + + verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); + verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); + verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); + // + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); + // verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + + } +}; #endif \ No newline at end of file diff --git a/14_Mortons/ITester.h b/14_Mortons/ITester.h index a0c76ac75..3be6d1d6b 100644 --- a/14_Mortons/ITester.h +++ b/14_Mortons/ITester.h @@ -18,7 +18,6 @@ class ITester struct PipelineSetupData { std::string testShaderPath; - core::smart_refctd_ptr device; core::smart_refctd_ptr api; core::smart_refctd_ptr assetMgr; diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl index 237e3260e..d209c737f 100644 --- a/14_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -57,10 +57,10 @@ T createAnyBitIntegerFromU64(uint64_t val) template vector createAnyBitIntegerVecFromU64Vec(vector val) { - array_get, T> getter; - array_set, T> setter; + array_get, T> getter; + array_set, T> setter; vector output; - NBL_UNROLL + NBL_UNROLL for (uint16_t i = 0; i < D; i++) { setter(output, i, createAnyBitIntegerFromU64(getter(val, i))); diff --git a/14_Mortons/app_resources/test2.comp.hlsl b/14_Mortons/app_resources/test2.comp.hlsl new file mode 100644 index 000000000..30b998f49 --- /dev/null +++ b/14_Mortons/app_resources/test2.comp.hlsl @@ -0,0 +1,17 @@ +//// Copyright (C) 2023-2024 - DevSH Graphics Programming Sp. z O.O. +//// This file is part of the "Nabla Engine". +//// For conditions of distribution and use, see copyright notice in nabla.h + +#include "testCommon2.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" + +[[vk::binding(0, 0)]] RWStructuredBuffer inputTestValues; +[[vk::binding(1, 0)]] RWStructuredBuffer outputTestValues; + +[numthreads(1, 1, 1)] +[shader("compute")] +void main(uint3 invocationID : SV_DispatchThreadID) +{ + uint32_t testID = glsl::gl_GlobalInvocationID().x; + fillTestValues2(inputTestValues[testID], outputTestValues[testID]); +} diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index dbe6ddbd2..6e9051c1b 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -98,7 +98,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonPlus_full_4 = morton_full_4A + morton_full_4B; output.mortonPlus_emulated_4 = morton_emulated_4A + morton_emulated_4B; - // // Minus + // Minus output.mortonMinus_small_2 = morton_small_2A - morton_small_2B; output.mortonMinus_medium_2 = morton_medium_2A - morton_medium_2B; output.mortonMinus_full_2 = morton_full_2A - morton_full_2B; @@ -114,7 +114,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonMinus_full_4 = morton_full_4A - morton_full_4B; output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; - // // Coordinate-wise equality + // Coordinate-wise equality output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); @@ -128,7 +128,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); - // output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); // Coordinate-wise unsigned inequality (just testing with less) output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); @@ -139,34 +139,29 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); - // output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); - // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); - // less(Vec4A, Vec4B); // Coordinate-wise signed inequality output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2B))); output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2B))); output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2B))); - // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(int32_t2(Vec2B))); output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3B))); output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3B))); output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3B))); - output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(int32_t3(Vec3B))); output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4B))); output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4B))); output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4B))); - // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(int16_t4(Vec4B))); - // // Cast to uint16_t which is what left shift for Mortons expect + // Cast to uint16_t which is what left shift for Mortons expect uint16_t castedShift = uint16_t(input.shift); - // // Each left shift clamps to correct bits so the result kinda makes sense - // // Left-shift + // Each left shift clamps to correct bits so the result kinda makes sense + // Left-shift left_shift_operator > leftShiftSmall2; output.mortonLeftShift_small_2 = leftShiftSmall2(morton_small_2A, castedShift % smallBits_2); left_shift_operator > leftShiftMedium2; @@ -244,10 +239,4 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa arithmetic_right_shift_operator > rightShiftSignedFull4; output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); - // arithmetic_right_shift_operator > rightShiftSignedEmulated2; - // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift); - // arithmetic_right_shift_operator > rightShiftSignedEmulated3; - // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift); - // arithmetic_right_shift_operator > rightShiftSignedEmulated4; - // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift); } \ No newline at end of file diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl new file mode 100644 index 000000000..e7eced852 --- /dev/null +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -0,0 +1,40 @@ +#include "common.hlsl" + +void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestValues) output) +{ + uint64_t2 Vec2A = { input.coordX, input.coordY }; + uint64_t2 Vec2B = { input.coordZ, input.coordW }; + + uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; + uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; + + uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; + uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + + int32_t2 Vec2BSigned = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int32_t3 Vec3BSigned = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4BSigned = createAnyBitIntegerVecFromU64Vec(Vec4B); + + morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); + morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); + morton::code morton_emulated_3_signed = createMortonFromU64Vec(Vec3A); + morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); + + + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); + + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSigned)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSigned)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSigned)); + + uint16_t castedShift = uint16_t(input.shift); + + arithmetic_right_shift_operator > rightShiftSignedEmulated2; + output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); + arithmetic_right_shift_operator > rightShiftSignedEmulated3; + output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); + arithmetic_right_shift_operator > rightShiftSignedEmulated4; + output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); +} diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index 6034e3469..bd4653f7c 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include -#include -#include #include #include "nbl/application_templates/MonoDeviceApplication.hpp" @@ -47,10 +45,15 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - CTester mortonTester; - pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; - mortonTester.setupPipeline(pplnSetupData); - mortonTester.performTests(); + // CTester mortonTester; + // pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + // mortonTester.setupPipeline(pplnSetupData); + // mortonTester.performTests(); + + CTester2 mortonTester2; + pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; + mortonTester2.setupPipeline(pplnSetupData); + mortonTester2.performTests(); } return true; From 6692311fddaf527dad42abe170394eb85ad4f5ae Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 9 Dec 2025 23:59:46 +0700 Subject: [PATCH 40/57] Delete fillSecondTestValues --- 14_Mortons/app_resources/common.hlsl | 44 ---------------------------- 1 file changed, 44 deletions(-) diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl index d209c737f..895728f26 100644 --- a/14_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -244,50 +244,6 @@ struct TestValues morton::code mortonSignedRightShift_emulated_4; - /* - void fillSecondTestValues(NBL_CONST_REF_ARG(InputTestValues) input) - { - uint64_t2 Vec2A = { input.coordX, input.coordY }; - uint64_t2 Vec2B = { input.coordZ, input.coordW }; - - uint64_t3 Vec3A = { input.coordX, input.coordY, input.coordZ }; - uint64_t3 Vec3B = { input.coordY, input.coordZ, input.coordW }; - - uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; - uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - - int64_t2 Vec2ASigned = int64_t2(Vec2A); - int64_t2 Vec2BSigned = int64_t2(Vec2B); - - int64_t3 Vec3ASigned = int64_t3(Vec3A); - int64_t3 Vec3BSigned = int64_t3(Vec3B); - - int64_t4 Vec4ASigned = int64_t4(Vec4A); - int64_t4 Vec4BSigned = int64_t4(Vec4B); - - morton::code morton_emulated_4A = morton::code::create(Vec4A); - morton::code morton_emulated_2_signed = morton::code::create(Vec2ASigned); - morton::code morton_emulated_3_signed = morton::code::create(Vec3ASigned); - morton::code morton_emulated_4_signed = morton::code::create(Vec4ASigned); - - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); - - output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); - - mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(int32_t2(Vec2BSigned))); - mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(int32_t3(Vec3BSigned))); - mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(int16_t4(Vec4BSigned))); - - uint16_t castedShift = uint16_t(input.shift); - - arithmetic_right_shift_operator > rightShiftSignedEmulated2; - mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift); - arithmetic_right_shift_operator > rightShiftSignedEmulated3; - mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift); - arithmetic_right_shift_operator > rightShiftSignedEmulated4; - mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift); - } - */ }; #endif From 4287ed1522352bd831280900fba1e7eb239e36c8 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 10 Dec 2025 23:57:16 +0700 Subject: [PATCH 41/57] Fix morton test --- 14_Mortons/CTester.h | 194 +++++++++++----------- 14_Mortons/app_resources/common.hlsl | 24 +-- 14_Mortons/app_resources/testCommon.hlsl | 106 ++++++++---- 14_Mortons/app_resources/testCommon2.hlsl | 17 +- 4 files changed, 194 insertions(+), 147 deletions(-) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 342cbcc00..ff83c02cc 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -62,57 +62,59 @@ class CTester final : public ITester uint64_t2 Vec2A = { testInput.coordX, testInput.coordY }; uint64_t2 Vec2B = { testInput.coordZ, testInput.coordW }; - uint16_t2 Vec2ASmall = uint16_t2(Vec2A & smallBitsMask_2 ); - uint16_t2 Vec2BSmall = uint16_t2(Vec2B & smallBitsMask_2 ); - uint16_t2 Vec2AMedium = uint16_t2(Vec2A & mediumBitsMask_2); - uint16_t2 Vec2BMedium = uint16_t2(Vec2B & mediumBitsMask_2); - uint32_t2 Vec2AFull = uint32_t2(Vec2A & fullBitsMask_2); - uint32_t2 Vec2BFull = uint32_t2(Vec2B & fullBitsMask_2); + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); uint64_t3 Vec3A = { testInput.coordX, testInput.coordY, testInput.coordZ }; uint64_t3 Vec3B = { testInput.coordY, testInput.coordZ, testInput.coordW }; - uint16_t3 Vec3ASmall = uint16_t3(Vec3A & smallBitsMask_3); - uint16_t3 Vec3BSmall = uint16_t3(Vec3B & smallBitsMask_3); - uint16_t3 Vec3AMedium = uint16_t3(Vec3A & mediumBitsMask_3); - uint16_t3 Vec3BMedium = uint16_t3(Vec3B & mediumBitsMask_3); - uint32_t3 Vec3AFull = uint32_t3(Vec3A & fullBitsMask_3); - uint32_t3 Vec3BFull = uint32_t3(Vec3B & fullBitsMask_3); + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); uint64_t4 Vec4A = { testInput.coordX, testInput.coordY, testInput.coordZ, testInput.coordW }; uint64_t4 Vec4B = { testInput.coordY, testInput.coordZ, testInput.coordW, testInput.coordX }; - uint16_t4 Vec4ASmall = uint16_t4(Vec4A & smallBitsMask_4); - uint16_t4 Vec4BSmall = uint16_t4(Vec4B & smallBitsMask_4); - uint16_t4 Vec4AMedium = uint16_t4(Vec4A & mediumBitsMask_4); - uint16_t4 Vec4BMedium = uint16_t4(Vec4B & mediumBitsMask_4); - uint16_t4 Vec4AFull = uint16_t4(Vec4A & fullBitsMask_4); - uint16_t4 Vec4BFull = uint16_t4(Vec4B & fullBitsMask_4); + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); // Signed vectors can't just have their highest bits masked off, for them to preserve sign we also need to left shift then right shift them // so their highest bits are all 0s or 1s depending on the sign of the number they encode - int16_t2 Vec2ASignedSmall = int16_t2(Vec2ASmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2BSignedSmall = int16_t2(Vec2BSmall << uint16_t(16 - smallBits_2)) >> int16_t(16 - smallBits_2); - int16_t2 Vec2ASignedMedium = int16_t2(Vec2AMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int16_t2 Vec2BSignedMedium = int16_t2(Vec2BMedium << uint16_t(16 - mediumBits_2)) >> int16_t(16 - mediumBits_2); - int32_t2 Vec2ASignedFull = int32_t2(Vec2AFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - int32_t2 Vec2BSignedFull = int32_t2(Vec2BFull << uint32_t(32 - fullBits_2)) >> int32_t(32 - fullBits_2); - - int16_t3 Vec3ASignedSmall = int16_t3(Vec3ASmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3BSignedSmall = int16_t3(Vec3BSmall << uint16_t(16 - smallBits_3)) >> int16_t(16 - smallBits_3); - int16_t3 Vec3ASignedMedium = int16_t3(Vec3AMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int16_t3 Vec3BSignedMedium = int16_t3(Vec3BMedium << uint16_t(16 - mediumBits_3)) >> int16_t(16 - mediumBits_3); - int32_t3 Vec3ASignedFull = int32_t3(Vec3AFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - int32_t3 Vec3BSignedFull = int32_t3(Vec3BFull << uint32_t(32 - fullBits_3)) >> int32_t(32 - fullBits_3); - - int16_t4 Vec4ASignedSmall = int16_t4(Vec4ASmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4BSignedSmall = int16_t4(Vec4BSmall << uint16_t(16 - smallBits_4)) >> int16_t(16 - smallBits_4); - int16_t4 Vec4ASignedMedium = int16_t4(Vec4AMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4BSignedMedium = int16_t4(Vec4BMedium << uint16_t(16 - mediumBits_4)) >> int16_t(16 - mediumBits_4); - int16_t4 Vec4ASignedFull = int16_t4(Vec4AFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); - int16_t4 Vec4BSignedFull = int16_t4(Vec4BFull << uint16_t(16 - fullBits_4)) >> int16_t(16 - fullBits_4); + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + const auto dummy1 = morton::code(Vec2ASignedSmall); + const auto dummy2 = createMortonFromU64Vec(Vec2A); // Plus expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); @@ -191,49 +193,49 @@ class CTester final : public ITester uint16_t castedShift = uint16_t(generatedShift); // Left-shift - expected.mortonLeftShift_small_2 = morton::code::create((Vec2ASmall << uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonLeftShift_medium_2 = morton::code::create((Vec2AMedium << uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonLeftShift_full_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonLeftShift_emulated_2 = morton::code::create((Vec2AFull << uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - - expected.mortonLeftShift_small_3 = morton::code::create((Vec3ASmall << uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonLeftShift_medium_3 = morton::code::create((Vec3AMedium << uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonLeftShift_full_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonLeftShift_emulated_3 = morton::code::create((Vec3AFull << uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - - expected.mortonLeftShift_small_4 = morton::code::create((Vec4ASmall << uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonLeftShift_medium_4 = morton::code::create((Vec4AMedium << uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonLeftShift_full_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonLeftShift_emulated_4 = morton::code::create((Vec4AFull << uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - + expected.mortonLeftShift_small_2 = createMortonFromU64Vec(Vec2ASmall << uint16_t(castedShift % smallBits_2)); + expected.mortonLeftShift_medium_2 = createMortonFromU64Vec(Vec2AMedium << uint16_t(castedShift % mediumBits_2)); + expected.mortonLeftShift_full_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + expected.mortonLeftShift_emulated_2 = createMortonFromU64Vec(Vec2AFull << uint32_t(castedShift % fullBits_2)); + + expected.mortonLeftShift_small_3 = createMortonFromU64Vec(Vec3ASmall << uint16_t(castedShift % smallBits_3)); + expected.mortonLeftShift_medium_3 = createMortonFromU64Vec(Vec3AMedium << uint16_t(castedShift % mediumBits_3)); + expected.mortonLeftShift_full_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + expected.mortonLeftShift_emulated_3 = createMortonFromU64Vec(Vec3AFull << uint32_t(castedShift % fullBits_3)); + + expected.mortonLeftShift_small_4 = createMortonFromU64Vec(Vec4ASmall << uint16_t(castedShift % smallBits_4)); + expected.mortonLeftShift_medium_4 = createMortonFromU64Vec(Vec4AMedium << uint16_t(castedShift % mediumBits_4)); + expected.mortonLeftShift_full_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + expected.mortonLeftShift_emulated_4 = createMortonFromU64Vec(Vec4AFull << uint16_t(castedShift % fullBits_4)); + // Unsigned right-shift - expected.mortonUnsignedRightShift_small_2 = morton::code::create((Vec2ASmall >> uint16_t(castedShift % smallBits_2)) & uint16_t(smallBitsMask_2)); - expected.mortonUnsignedRightShift_medium_2 = morton::code::create((Vec2AMedium >> uint16_t(castedShift % mediumBits_2)) & uint16_t(mediumBitsMask_2)); - expected.mortonUnsignedRightShift_full_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2)) & uint32_t(fullBitsMask_2)); - expected.mortonUnsignedRightShift_emulated_2 = morton::code::create((Vec2AFull >> uint32_t(castedShift % fullBits_2))& uint32_t(fullBitsMask_2)); - - expected.mortonUnsignedRightShift_small_3 = morton::code::create((Vec3ASmall >> uint16_t(castedShift % smallBits_3)) & uint16_t(smallBitsMask_3)); - expected.mortonUnsignedRightShift_medium_3 = morton::code::create((Vec3AMedium >> uint16_t(castedShift % mediumBits_3)) & uint16_t(mediumBitsMask_3)); - expected.mortonUnsignedRightShift_full_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3)) & uint32_t(fullBitsMask_3)); - expected.mortonUnsignedRightShift_emulated_3 = morton::code::create((Vec3AFull >> uint32_t(castedShift % fullBits_3))& uint32_t(fullBitsMask_3)); - - expected.mortonUnsignedRightShift_small_4 = morton::code::create((Vec4ASmall >> uint16_t(castedShift % smallBits_4)) & uint16_t(smallBitsMask_4)); - expected.mortonUnsignedRightShift_medium_4 = morton::code::create((Vec4AMedium >> uint16_t(castedShift % mediumBits_4)) & uint16_t(mediumBitsMask_4)); - expected.mortonUnsignedRightShift_full_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4)) & uint16_t(fullBitsMask_4)); - expected.mortonUnsignedRightShift_emulated_4 = morton::code::create((Vec4AFull >> uint16_t(castedShift % fullBits_4))& uint16_t(fullBitsMask_4)); - + expected.mortonUnsignedRightShift_small_2 = morton::code::create(Vec2ASmall >> uint16_t(castedShift % smallBits_2)); + expected.mortonUnsignedRightShift_medium_2 = morton::code::create(Vec2AMedium >> uint16_t(castedShift % mediumBits_2)); + expected.mortonUnsignedRightShift_full_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + expected.mortonUnsignedRightShift_emulated_2 = morton::code::create(Vec2AFull >> uint32_t(castedShift % fullBits_2)); + + expected.mortonUnsignedRightShift_small_3 = morton::code::create(Vec3ASmall >> uint16_t(castedShift % smallBits_3)); + expected.mortonUnsignedRightShift_medium_3 = morton::code::create(Vec3AMedium >> uint16_t(castedShift % mediumBits_3)); + expected.mortonUnsignedRightShift_full_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + expected.mortonUnsignedRightShift_emulated_3 = morton::code::create(Vec3AFull >> uint32_t(castedShift % fullBits_3)); + + expected.mortonUnsignedRightShift_small_4 = morton::code::create(Vec4ASmall >> uint16_t(castedShift % smallBits_4)); + expected.mortonUnsignedRightShift_medium_4 = morton::code::create(Vec4AMedium >> uint16_t(castedShift % mediumBits_4)); + expected.mortonUnsignedRightShift_full_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + expected.mortonUnsignedRightShift_emulated_4 = morton::code::create(Vec4AFull >> uint16_t(castedShift % fullBits_4)); + // Signed right-shift - expected.mortonSignedRightShift_small_2 = morton::code::create((Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)) & int16_t(smallBitsMask_2)); - expected.mortonSignedRightShift_medium_2 = morton::code::create((Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)) & int16_t(mediumBitsMask_2)); - expected.mortonSignedRightShift_full_2 = morton::code::create((Vec2ASignedFull >> int32_t(castedShift % fullBits_2)) & int32_t(fullBitsMask_2)); - - expected.mortonSignedRightShift_small_3 = morton::code::create((Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)) & int16_t(smallBitsMask_3)); - expected.mortonSignedRightShift_medium_3 = morton::code::create((Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)) & int16_t(mediumBitsMask_3)); - expected.mortonSignedRightShift_full_3 = morton::code::create((Vec3ASignedFull >> int32_t(castedShift % fullBits_3)) & int32_t(fullBitsMask_3)); - - expected.mortonSignedRightShift_small_4 = morton::code::create((Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)) & int16_t(smallBitsMask_4)); - expected.mortonSignedRightShift_medium_4 = morton::code::create((Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)) & int16_t(mediumBitsMask_4)); - expected.mortonSignedRightShift_full_4 = morton::code::create((Vec4ASignedFull >> int16_t(castedShift % fullBits_4)) & int16_t(fullBitsMask_4)); + expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); + expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); + expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + + expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); + expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); + expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + + expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); + expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); + expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); } performCpuTests(testInput, expected); @@ -278,7 +280,7 @@ class CTester final : public ITester verifyTestValue("emulatedSignedRightShifted", expectedTestValues.emulatedSignedRightShifted, testValues.emulatedSignedRightShifted, testType); verifyTestValue("emulatedUnaryMinus", expectedTestValues.emulatedUnaryMinus, testValues.emulatedUnaryMinus, testType); - // // Morton Plus + // Morton Plus verifyTestValue("mortonPlus_small_2", expectedTestValues.mortonPlus_small_2, testValues.mortonPlus_small_2, testType); verifyTestValue("mortonPlus_medium_2", expectedTestValues.mortonPlus_medium_2, testValues.mortonPlus_medium_2, testType); verifyTestValue("mortonPlus_full_2", expectedTestValues.mortonPlus_full_2, testValues.mortonPlus_full_2, testType); @@ -293,8 +295,8 @@ class CTester final : public ITester verifyTestValue("mortonPlus_medium_4", expectedTestValues.mortonPlus_medium_4, testValues.mortonPlus_medium_4, testType); verifyTestValue("mortonPlus_full_4", expectedTestValues.mortonPlus_full_4, testValues.mortonPlus_full_4, testType); verifyTestValue("mortonPlus_emulated_4", expectedTestValues.mortonPlus_emulated_4, testValues.mortonPlus_emulated_4, testType); - - // // Morton Minus + + // Morton Minus verifyTestValue("mortonMinus_small_2", expectedTestValues.mortonMinus_small_2, testValues.mortonMinus_small_2, testType); verifyTestValue("mortonMinus_medium_2", expectedTestValues.mortonMinus_medium_2, testValues.mortonMinus_medium_2, testType); verifyTestValue("mortonMinus_full_2", expectedTestValues.mortonMinus_full_2, testValues.mortonMinus_full_2, testType); @@ -310,7 +312,7 @@ class CTester final : public ITester verifyTestValue("mortonMinus_full_4", expectedTestValues.mortonMinus_full_4, testValues.mortonMinus_full_4, testType); verifyTestValue("mortonMinus_emulated_4", expectedTestValues.mortonMinus_emulated_4, testValues.mortonMinus_emulated_4, testType); - // // Morton coordinate-wise equality + // Morton coordinate-wise equality verifyTestValue("mortonEqual_small_2", expectedTestValues.mortonEqual_small_2, testValues.mortonEqual_small_2, testType); verifyTestValue("mortonEqual_medium_2", expectedTestValues.mortonEqual_medium_2, testValues.mortonEqual_medium_2, testType); verifyTestValue("mortonEqual_full_2", expectedTestValues.mortonEqual_full_2, testValues.mortonEqual_full_2, testType); @@ -326,7 +328,7 @@ class CTester final : public ITester verifyTestValue("mortonEqual_full_4", expectedTestValues.mortonEqual_full_4, testValues.mortonEqual_full_4, testType); verifyTestValue("mortonEqual_emulated_4", expectedTestValues.mortonEqual_emulated_4, testValues.mortonEqual_emulated_4, testType); - // // Morton coordinate-wise unsigned inequality + // Morton coordinate-wise unsigned inequality verifyTestValue("mortonUnsignedLess_small_2", expectedTestValues.mortonUnsignedLess_small_2, testValues.mortonUnsignedLess_small_2, testType); verifyTestValue("mortonUnsignedLess_medium_2", expectedTestValues.mortonUnsignedLess_medium_2, testValues.mortonUnsignedLess_medium_2, testType); verifyTestValue("mortonUnsignedLess_full_2", expectedTestValues.mortonUnsignedLess_full_2, testValues.mortonUnsignedLess_full_2, testType); @@ -341,7 +343,7 @@ class CTester final : public ITester verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); - // // Morton coordinate-wise signed inequality + // Morton coordinate-wise signed inequality verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); @@ -354,7 +356,7 @@ class CTester final : public ITester verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); - // // Morton left-shift + // Morton left-shift verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); verifyTestValue("mortonLeftShift_medium_2", expectedTestValues.mortonLeftShift_medium_2, testValues.mortonLeftShift_medium_2, testType); verifyTestValue("mortonLeftShift_full_2", expectedTestValues.mortonLeftShift_full_2, testValues.mortonLeftShift_full_2, testType); @@ -370,7 +372,7 @@ class CTester final : public ITester verifyTestValue("mortonLeftShift_full_4", expectedTestValues.mortonLeftShift_full_4, testValues.mortonLeftShift_full_4, testType); verifyTestValue("mortonLeftShift_emulated_4", expectedTestValues.mortonLeftShift_emulated_4, testValues.mortonLeftShift_emulated_4, testType); - // // Morton unsigned right-shift + // Morton unsigned right-shift verifyTestValue("mortonUnsignedRightShift_small_2", expectedTestValues.mortonUnsignedRightShift_small_2, testValues.mortonUnsignedRightShift_small_2, testType); verifyTestValue("mortonUnsignedRightShift_medium_2", expectedTestValues.mortonUnsignedRightShift_medium_2, testValues.mortonUnsignedRightShift_medium_2, testType); verifyTestValue("mortonUnsignedRightShift_full_2", expectedTestValues.mortonUnsignedRightShift_full_2, testValues.mortonUnsignedRightShift_full_2, testType); @@ -386,7 +388,7 @@ class CTester final : public ITester verifyTestValue("mortonUnsignedRightShift_full_4", expectedTestValues.mortonUnsignedRightShift_full_4, testValues.mortonUnsignedRightShift_full_4, testType); verifyTestValue("mortonUnsignedRightShift_emulated_4", expectedTestValues.mortonUnsignedRightShift_emulated_4, testValues.mortonUnsignedRightShift_emulated_4, testType); - // // Morton signed right-shift + // Morton signed right-shift verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); @@ -456,14 +458,14 @@ class CTester2 final : public ITester expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); uint16_t castedShift = uint16_t(generatedShift); - expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2A << uint64_t(castedShift % fullBits_2)); - expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3A << uint64_t(castedShift % fullBits_3)); - expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4A << uint64_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); } performCpuTests(testInput, expected); - // performGpuTests(testInput, expected); + performGpuTests(testInput, expected); } m_logger->log("SECOND TESTS DONE.", system::ILogger::ELL_PERFORMANCE); } @@ -495,10 +497,10 @@ class CTester2 final : public ITester verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); - // - // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); - // verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); - // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); + + verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); + verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); + verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); } }; diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl index 895728f26..ef75d6057 100644 --- a/14_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -19,8 +19,8 @@ NBL_CONSTEXPR uint16_t smallBits_4 = 4; NBL_CONSTEXPR uint16_t mediumBits_4 = 8; NBL_CONSTEXPR uint16_t fullBits_4 = 16; -template -NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR T bitMask = (uint64_t(1) << Bits) - 1; +template +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t bitMask = (uint64_t(1) << (Bits-1)) - 1; #ifndef __HLSL_VERSION @@ -41,23 +41,27 @@ constexpr uint64_t fullBitsMask_4 = (uint64_t(1) << fullBits_4) - 1; using namespace nbl::hlsl; template -T createAnyBitIntegerFromU64(uint64_t val) +NBL_CONSTEXPR_INLINE_FUNC T createAnyBitIntegerFromU64(uint64_t val) { - if(Signed && (_static_cast(val) < 0)) + if(Signed) { + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << (Bits - 1)) - 1; // fill excess bit with one - return T(val) | ~bitMask; + if (int64_t(val) < 0) + return T(val) | ~mask; + else + return T(val) & mask; } else { - return T(val) & bitMask; - + NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << Bits) - 1; + return T(val) & mask; } } template -vector createAnyBitIntegerVecFromU64Vec(vector val) +NBL_CONSTEXPR_INLINE_FUNC vector createAnyBitIntegerVecFromU64Vec(vector val) { - array_get, T> getter; + array_get, uint64_t> getter; array_set, T> setter; vector output; NBL_UNROLL @@ -69,7 +73,7 @@ vector createAnyBitIntegerVecFromU64Vec(vector val) } template -morton::code createMortonFromU64Vec(const vector vec) +NBL_CONSTEXPR_INLINE_FUNC morton::code createMortonFromU64Vec(const vector vec) { using morton_code_t = morton::code; using decode_component_t = typename morton_code_t::decode_component_t; diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index 6e9051c1b..f068b474b 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -40,6 +40,48 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; + uint16_t2 Vec2ASmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint16_t2 Vec2AMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint16_t2 Vec2BMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + uint32_t2 Vec2AFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + uint32_t2 Vec2BFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + uint16_t3 Vec3ASmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint16_t3 Vec3AMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint16_t3 Vec3BMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + uint32_t3 Vec3AFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + uint32_t3 Vec3BFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + uint16_t4 Vec4ASmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4AFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + + int16_t2 Vec2ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec2B); + int16_t2 Vec2ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2A); + int16_t2 Vec2BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t2 Vec2ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec2A); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + + int16_t3 Vec3ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t3 Vec3ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3A); + int16_t3 Vec3BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec3B); + int32_t3 Vec3ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec3A); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + + int16_t4 Vec4ASignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedSmall = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedMedium = createAnyBitIntegerVecFromU64Vec(Vec4B); + int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + morton::code morton_small_2A = createMortonFromU64Vec(Vec2A); morton::code morton_medium_2A = createMortonFromU64Vec(Vec2A); morton::code morton_full_2A = createMortonFromU64Vec(Vec2A); @@ -115,48 +157,48 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonMinus_emulated_4 = morton_emulated_4A - morton_emulated_4B; // Coordinate-wise equality - output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(uint16_t2(Vec2B))); - output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(uint32_t2(Vec2B))); - output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(uint32_t2(Vec2B))); + output.mortonEqual_small_2 = uint32_t2(morton_small_2A.equal(Vec2BSmall)); + output.mortonEqual_medium_2 = uint32_t2(morton_medium_2A.equal(Vec2BMedium)); + output.mortonEqual_full_2 = uint32_t2(morton_full_2A.equal(Vec2BFull)); + output.mortonEqual_emulated_2 = uint32_t2(morton_emulated_2A.equal(Vec2BFull)); - output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(uint16_t3(Vec3B))); - output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(uint32_t3(Vec3B))); - output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(uint32_t3(Vec3B))); + output.mortonEqual_small_3 = uint32_t3(morton_small_3A.equal(Vec3BSmall)); + output.mortonEqual_medium_3 = uint32_t3(morton_medium_3A.equal(Vec3BMedium)); + output.mortonEqual_full_3 = uint32_t3(morton_full_3A.equal(Vec3BFull)); + output.mortonEqual_emulated_3 = uint32_t3(morton_emulated_3A.equal(Vec3BFull)); - output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(uint16_t4(Vec4B))); - output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(uint16_t4(Vec4B))); + output.mortonEqual_small_4 = uint32_t4(morton_small_4A.equal(Vec4BSmall)); + output.mortonEqual_medium_4 = uint32_t4(morton_medium_4A.equal(Vec4BMedium)); + output.mortonEqual_full_4 = uint32_t4(morton_full_4A.equal(Vec4BFull)); + output.mortonEqual_emulated_4 = uint32_t4(morton_emulated_4A.equal(Vec4BFull)); // Coordinate-wise unsigned inequality (just testing with less) - output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(uint16_t2(Vec2B))); - output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(uint32_t2(Vec2B))); - output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(uint32_t2(Vec2B))); + output.mortonUnsignedLess_small_2 = uint32_t2(morton_small_2A.lessThan(Vec2BSmall)); + output.mortonUnsignedLess_medium_2 = uint32_t2(morton_medium_2A.lessThan(Vec2BMedium)); + output.mortonUnsignedLess_full_2 = uint32_t2(morton_full_2A.lessThan(Vec2BFull)); + output.mortonUnsignedLess_emulated_2 = uint32_t2(morton_emulated_2A.lessThan(Vec2BFull)); - output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(uint16_t3(Vec3B))); - output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(uint32_t3(Vec3B))); - output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(uint32_t3(Vec3B))); + output.mortonUnsignedLess_small_3 = uint32_t3(morton_small_3A.lessThan(Vec3BSmall)); + output.mortonUnsignedLess_medium_3 = uint32_t3(morton_medium_3A.lessThan(Vec3BMedium)); + output.mortonUnsignedLess_full_3 = uint32_t3(morton_full_3A.lessThan(Vec3BFull)); + output.mortonUnsignedLess_emulated_3 = uint32_t3(morton_emulated_3A.lessThan(Vec3BFull)); - output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(uint16_t4(Vec4B))); - output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); + output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); + output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); // Coordinate-wise signed inequality - output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(int16_t2(Vec2B))); - output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(int16_t2(Vec2B))); - output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(int32_t2(Vec2B))); + output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); + output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); + output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); - output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(int16_t3(Vec3B))); - output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(int16_t3(Vec3B))); - output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(int32_t3(Vec3B))); + output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); + output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); + output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); - output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(int16_t4(Vec4B))); - output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(int16_t4(Vec4B))); - output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(int16_t4(Vec4B))); + output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); + output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); + output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); // Cast to uint16_t which is what left shift for Mortons expect uint16_t castedShift = uint16_t(input.shift); diff --git a/14_Mortons/app_resources/testCommon2.hlsl b/14_Mortons/app_resources/testCommon2.hlsl index e7eced852..365b82340 100644 --- a/14_Mortons/app_resources/testCommon2.hlsl +++ b/14_Mortons/app_resources/testCommon2.hlsl @@ -11,11 +11,10 @@ void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestV uint64_t4 Vec4A = { input.coordX, input.coordY, input.coordZ, input.coordW }; uint64_t4 Vec4B = { input.coordY, input.coordZ, input.coordW, input.coordX }; - int32_t2 Vec2BSigned = createAnyBitIntegerVecFromU64Vec(Vec2B); - - int32_t3 Vec3BSigned = createAnyBitIntegerVecFromU64Vec(Vec3B); - - int16_t4 Vec4BSigned = createAnyBitIntegerVecFromU64Vec(Vec4B); + uint16_t4 Vec4BFull = createAnyBitIntegerVecFromU64Vec(Vec4B); + int32_t2 Vec2BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec2B); + int32_t3 Vec3BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec3B); + int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); morton::code morton_emulated_4A = createMortonFromU64Vec(Vec4A); morton::code morton_emulated_2_signed = createMortonFromU64Vec(Vec2A); @@ -23,11 +22,11 @@ void fillTestValues2(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestV morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(uint16_t4(Vec4B))); + output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); - output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSigned)); - output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSigned)); - output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSigned)); + output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); + output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); + output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); uint16_t castedShift = uint16_t(input.shift); From 6a7b003798bb894d36be63609fb987dd20fccaa3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 00:27:20 +0700 Subject: [PATCH 42/57] Remove unnecessary code --- 14_Mortons/app_resources/common.hlsl | 20 -------------------- 14_Mortons/main.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl index ef75d6057..980bb0c32 100644 --- a/14_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -19,26 +19,6 @@ NBL_CONSTEXPR uint16_t smallBits_4 = 4; NBL_CONSTEXPR uint16_t mediumBits_4 = 8; NBL_CONSTEXPR uint16_t fullBits_4 = 16; -template -NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR uint64_t bitMask = (uint64_t(1) << (Bits-1)) - 1; - - -#ifndef __HLSL_VERSION - -constexpr uint64_t smallBitsMask_2 = (uint64_t(1) << smallBits_2) - 1; -constexpr uint64_t mediumBitsMask_2 = (uint64_t(1) << mediumBits_2) - 1; -constexpr uint64_t fullBitsMask_2 = (uint64_t(1) << fullBits_2) - 1; - -constexpr uint64_t smallBitsMask_3 = (uint64_t(1) << smallBits_3) - 1; -constexpr uint64_t mediumBitsMask_3 = (uint64_t(1) << mediumBits_3) - 1; -constexpr uint64_t fullBitsMask_3 = (uint64_t(1) << fullBits_3) - 1; - -constexpr uint64_t smallBitsMask_4 = (uint64_t(1) << smallBits_4) - 1; -constexpr uint64_t mediumBitsMask_4 = (uint64_t(1) << mediumBits_4) - 1; -constexpr uint64_t fullBitsMask_4 = (uint64_t(1) << fullBits_4) - 1; - -#endif - using namespace nbl::hlsl; template NBL_CONSTEXPR_INLINE_FUNC T createAnyBitIntegerFromU64(uint64_t val) diff --git a/14_Mortons/main.cpp b/14_Mortons/main.cpp index bd4653f7c..12f55805f 100644 --- a/14_Mortons/main.cpp +++ b/14_Mortons/main.cpp @@ -45,10 +45,10 @@ class MortonTest final : public MonoDeviceApplication, public BuiltinResourcesAp // Some tests with mortons with emulated uint storage were cut off, it should be fine since each tested on their own produces correct results for each operator // Blocked by https://github.com/KhronosGroup/SPIRV-Tools/issues/6104 { - // CTester mortonTester; - // pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; - // mortonTester.setupPipeline(pplnSetupData); - // mortonTester.performTests(); + CTester mortonTester; + pplnSetupData.testShaderPath = "app_resources/test.comp.hlsl"; + mortonTester.setupPipeline(pplnSetupData); + mortonTester.performTests(); CTester2 mortonTester2; pplnSetupData.testShaderPath = "app_resources/test2.comp.hlsl"; From f012a1af45dc0fd9240e342d185a3f1d6e4a2dc3 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 00:44:24 +0700 Subject: [PATCH 43/57] Add some comment for the reason we have to CTester --- 14_Mortons/CTester.h | 1 + 1 file changed, 1 insertion(+) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index ff83c02cc..340e405d1 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -403,6 +403,7 @@ class CTester final : public ITester } }; +// Some hlsl code will result in compilation error if mixed together due to some bug in dxc. So we separate them into multiple shader compilation and test. class CTester2 final : public ITester { public: From f415e8c09150a1643945112cb5c37cb1df3acf69 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 00:48:53 +0700 Subject: [PATCH 44/57] Remove dummy code --- 14_Mortons/CTester.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 340e405d1..447ceb18a 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -113,8 +113,6 @@ class CTester final : public ITester int16_t4 Vec4ASignedFull = createAnyBitIntegerVecFromU64Vec(Vec4A); int16_t4 Vec4BSignedFull = createAnyBitIntegerVecFromU64Vec(Vec4B); - const auto dummy1 = morton::code(Vec2ASignedSmall); - const auto dummy2 = createMortonFromU64Vec(Vec2A); // Plus expected.mortonPlus_small_2 = createMortonFromU64Vec(Vec2ASmall + Vec2BSmall); expected.mortonPlus_medium_2 = createMortonFromU64Vec(Vec2AMedium + Vec2BMedium); From 8f72b9ecda1a2e39a58b4fc43fc6d9a025f80728 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Thu, 11 Dec 2025 16:18:56 +0700 Subject: [PATCH 45/57] Fix compiler warning for shader compilation --- 14_Mortons/app_resources/common.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/14_Mortons/app_resources/common.hlsl b/14_Mortons/app_resources/common.hlsl index 980bb0c32..98e5e1342 100644 --- a/14_Mortons/app_resources/common.hlsl +++ b/14_Mortons/app_resources/common.hlsl @@ -27,14 +27,14 @@ NBL_CONSTEXPR_INLINE_FUNC T createAnyBitIntegerFromU64(uint64_t val) { NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << (Bits - 1)) - 1; // fill excess bit with one - if (int64_t(val) < 0) - return T(val) | ~mask; + if (_static_cast(val) < 0) + return _static_cast(val | ~mask); else - return T(val) & mask; + return _static_cast(val & mask); } else { NBL_CONSTEXPR_FUNC_SCOPE_VAR uint64_t mask = (uint64_t(1) << Bits) - 1; - return T(val) & mask; + return _static_cast(val & mask); } } From 3042409a14c7e69e8e63191b5c1b996e863a7cda Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 18:02:46 +0700 Subject: [PATCH 46/57] Add back second test to first in commented form --- 14_Mortons/CTester.h | 15 +++++++++++++++ 14_Mortons/app_resources/testCommon.hlsl | 17 ++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/14_Mortons/CTester.h b/14_Mortons/CTester.h index 447ceb18a..6933e77e5 100644 --- a/14_Mortons/CTester.h +++ b/14_Mortons/CTester.h @@ -175,19 +175,23 @@ class CTester final : public ITester expected.mortonUnsignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASmall, Vec4BSmall)); expected.mortonUnsignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4AMedium, Vec4BMedium)); expected.mortonUnsignedLess_full_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); + expected.mortonUnsignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4AFull, Vec4BFull)); // Coordinate-wise signed inequality expected.mortonSignedLess_small_2 = uint32_t2(glm::lessThan(Vec2ASignedSmall, Vec2BSignedSmall)); expected.mortonSignedLess_medium_2 = uint32_t2(glm::lessThan(Vec2ASignedMedium, Vec2BSignedMedium)); expected.mortonSignedLess_full_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); + expected.mortonSignedLess_emulated_2 = uint32_t2(glm::lessThan(Vec2ASignedFull, Vec2BSignedFull)); expected.mortonSignedLess_small_3 = uint32_t3(glm::lessThan(Vec3ASignedSmall, Vec3BSignedSmall)); expected.mortonSignedLess_medium_3 = uint32_t3(glm::lessThan(Vec3ASignedMedium, Vec3BSignedMedium)); expected.mortonSignedLess_full_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); + expected.mortonSignedLess_emulated_3 = uint32_t3(glm::lessThan(Vec3ASignedFull, Vec3BSignedFull)); expected.mortonSignedLess_small_4 = uint32_t4(glm::lessThan(Vec4ASignedSmall, Vec4BSignedSmall)); expected.mortonSignedLess_medium_4 = uint32_t4(glm::lessThan(Vec4ASignedMedium, Vec4BSignedMedium)); expected.mortonSignedLess_full_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); + expected.mortonSignedLess_emulated_4 = uint32_t4(glm::lessThan(Vec4ASignedFull, Vec4BSignedFull)); uint16_t castedShift = uint16_t(generatedShift); // Left-shift @@ -226,14 +230,17 @@ class CTester final : public ITester expected.mortonSignedRightShift_small_2 = morton::code::create(Vec2ASignedSmall >> int16_t(castedShift % smallBits_2)); expected.mortonSignedRightShift_medium_2 = morton::code::create(Vec2ASignedMedium >> int16_t(castedShift % mediumBits_2)); expected.mortonSignedRightShift_full_2 = morton::code::create(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); + expected.mortonSignedRightShift_emulated_2 = createMortonFromU64Vec(Vec2ASignedFull >> int32_t(castedShift % fullBits_2)); expected.mortonSignedRightShift_small_3 = morton::code::create(Vec3ASignedSmall >> int16_t(castedShift % smallBits_3)); expected.mortonSignedRightShift_medium_3 = morton::code::create(Vec3ASignedMedium >> int16_t(castedShift % mediumBits_3)); expected.mortonSignedRightShift_full_3 = morton::code::create(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); + expected.mortonSignedRightShift_emulated_3 = createMortonFromU64Vec(Vec3ASignedFull >> int32_t(castedShift % fullBits_3)); expected.mortonSignedRightShift_small_4 = morton::code::create(Vec4ASignedSmall >> int16_t(castedShift % smallBits_4)); expected.mortonSignedRightShift_medium_4 = morton::code::create(Vec4ASignedMedium >> int16_t(castedShift % mediumBits_4)); expected.mortonSignedRightShift_full_4 = morton::code::create(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); + expected.mortonSignedRightShift_emulated_4 = createMortonFromU64Vec(Vec4ASignedFull >> int16_t(castedShift % fullBits_4)); } performCpuTests(testInput, expected); @@ -263,6 +270,7 @@ class CTester final : public ITester void verifyTestValues(const TestValues& expectedTestValues, const TestValues& testValues, ITester::TestType testType) { + // Some verification is commented out and moved to CTester2 due to bug in dxc. Uncomment them when the bug is fixed. verifyTestValue("emulatedAnd", expectedTestValues.emulatedAnd, testValues.emulatedAnd, testType); verifyTestValue("emulatedOr", expectedTestValues.emulatedOr, testValues.emulatedOr, testType); verifyTestValue("emulatedXor", expectedTestValues.emulatedXor, testValues.emulatedXor, testType); @@ -340,19 +348,23 @@ class CTester final : public ITester verifyTestValue("mortonUnsignedLess_small_4", expectedTestValues.mortonUnsignedLess_small_4, testValues.mortonUnsignedLess_small_4, testType); verifyTestValue("mortonUnsignedLess_medium_4", expectedTestValues.mortonUnsignedLess_medium_4, testValues.mortonUnsignedLess_medium_4, testType); verifyTestValue("mortonUnsignedLess_full_4", expectedTestValues.mortonUnsignedLess_full_4, testValues.mortonUnsignedLess_full_4, testType); + // verifyTestValue("mortonUnsignedLess_emulated_4", expectedTestValues.mortonUnsignedLess_emulated_4, testValues.mortonUnsignedLess_emulated_4, testType); // Morton coordinate-wise signed inequality verifyTestValue("mortonSignedLess_small_2", expectedTestValues.mortonSignedLess_small_2, testValues.mortonSignedLess_small_2, testType); verifyTestValue("mortonSignedLess_medium_2", expectedTestValues.mortonSignedLess_medium_2, testValues.mortonSignedLess_medium_2, testType); verifyTestValue("mortonSignedLess_full_2", expectedTestValues.mortonSignedLess_full_2, testValues.mortonSignedLess_full_2, testType); + // verifyTestValue("mortonSignedLess_emulated_2", expectedTestValues.mortonSignedLess_emulated_2, testValues.mortonSignedLess_emulated_2, testType); verifyTestValue("mortonSignedLess_small_3", expectedTestValues.mortonSignedLess_small_3, testValues.mortonSignedLess_small_3, testType); verifyTestValue("mortonSignedLess_medium_3", expectedTestValues.mortonSignedLess_medium_3, testValues.mortonSignedLess_medium_3, testType); verifyTestValue("mortonSignedLess_full_3", expectedTestValues.mortonSignedLess_full_3, testValues.mortonSignedLess_full_3, testType); + // verifyTestValue("mortonSignedLess_emulated_3", expectedTestValues.mortonSignedLess_emulated_3, testValues.mortonSignedLess_emulated_3, testType); verifyTestValue("mortonSignedLess_small_4", expectedTestValues.mortonSignedLess_small_4, testValues.mortonSignedLess_small_4, testType); verifyTestValue("mortonSignedLess_medium_4", expectedTestValues.mortonSignedLess_medium_4, testValues.mortonSignedLess_medium_4, testType); verifyTestValue("mortonSignedLess_full_4", expectedTestValues.mortonSignedLess_full_4, testValues.mortonSignedLess_full_4, testType); + // verifyTestValue("mortonSignedLess_emulated_4", expectedTestValues.mortonSignedLess_emulated_4, testValues.mortonSignedLess_emulated_4, testType); // Morton left-shift verifyTestValue("mortonLeftShift_small_2", expectedTestValues.mortonLeftShift_small_2, testValues.mortonLeftShift_small_2, testType); @@ -390,14 +402,17 @@ class CTester final : public ITester verifyTestValue("mortonSignedRightShift_small_2", expectedTestValues.mortonSignedRightShift_small_2, testValues.mortonSignedRightShift_small_2, testType); verifyTestValue("mortonSignedRightShift_medium_2", expectedTestValues.mortonSignedRightShift_medium_2, testValues.mortonSignedRightShift_medium_2, testType); verifyTestValue("mortonSignedRightShift_full_2", expectedTestValues.mortonSignedRightShift_full_2, testValues.mortonSignedRightShift_full_2, testType); + // verifyTestValue("mortonSignedRightShift_emulated_2", expectedTestValues.mortonSignedRightShift_emulated_2, testValues.mortonSignedRightShift_emulated_2, testType); verifyTestValue("mortonSignedRightShift_small_3", expectedTestValues.mortonSignedRightShift_small_3, testValues.mortonSignedRightShift_small_3, testType); verifyTestValue("mortonSignedRightShift_medium_3", expectedTestValues.mortonSignedRightShift_medium_3, testValues.mortonSignedRightShift_medium_3, testType); verifyTestValue("mortonSignedRightShift_full_3", expectedTestValues.mortonSignedRightShift_full_3, testValues.mortonSignedRightShift_full_3, testType); + //verifyTestValue("mortonSignedRightShift_emulated_3", expectedTestValues.mortonSignedRightShift_emulated_3, testValues.mortonSignedRightShift_emulated_3, testType); verifyTestValue("mortonSignedRightShift_small_4", expectedTestValues.mortonSignedRightShift_small_4, testValues.mortonSignedRightShift_small_4, testType); verifyTestValue("mortonSignedRightShift_medium_4", expectedTestValues.mortonSignedRightShift_medium_4, testValues.mortonSignedRightShift_medium_4, testType); verifyTestValue("mortonSignedRightShift_full_4", expectedTestValues.mortonSignedRightShift_full_4, testValues.mortonSignedRightShift_full_4, testType); + // verifyTestValue("mortonSignedRightShift_emulated_4", expectedTestValues.mortonSignedRightShift_emulated_4, testValues.mortonSignedRightShift_emulated_4, testType); } }; diff --git a/14_Mortons/app_resources/testCommon.hlsl b/14_Mortons/app_resources/testCommon.hlsl index f068b474b..6144b6ce9 100644 --- a/14_Mortons/app_resources/testCommon.hlsl +++ b/14_Mortons/app_resources/testCommon.hlsl @@ -123,7 +123,8 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa morton::code morton_medium_4_signed = createMortonFromU64Vec(Vec4A); morton::code morton_full_4_signed = createMortonFromU64Vec(Vec4A); morton::code morton_emulated_4_signed = createMortonFromU64Vec(Vec4A); - + + // Some test and operation is moved to testCommon2.hlsl due to dxc bug that cause compilation failure. Uncomment when the bug is fixed. // Plus output.mortonPlus_small_2 = morton_small_2A + morton_small_2B; output.mortonPlus_medium_2 = morton_medium_2A + morton_medium_2B; @@ -186,19 +187,23 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonUnsignedLess_small_4 = uint32_t4(morton_small_4A.lessThan(Vec4BSmall)); output.mortonUnsignedLess_medium_4 = uint32_t4(morton_medium_4A.lessThan(Vec4BMedium)); output.mortonUnsignedLess_full_4 = uint32_t4(morton_full_4A.lessThan(Vec4BFull)); + // output.mortonUnsignedLess_emulated_4 = uint32_t4(morton_emulated_4A.lessThan(Vec4BFull)); // Coordinate-wise signed inequality output.mortonSignedLess_small_2 = uint32_t2(morton_small_2_signed.lessThan(Vec2BSignedSmall)); output.mortonSignedLess_medium_2 = uint32_t2(morton_medium_2_signed.lessThan(Vec2BSignedMedium)); output.mortonSignedLess_full_2 = uint32_t2(morton_full_2_signed.lessThan(Vec2BSignedFull)); + // output.mortonSignedLess_emulated_2 = uint32_t2(morton_emulated_2_signed.lessThan(Vec2BSignedFull)); output.mortonSignedLess_small_3 = uint32_t3(morton_small_3_signed.lessThan(Vec3BSignedSmall)); output.mortonSignedLess_medium_3 = uint32_t3(morton_medium_3_signed.lessThan(Vec3BSignedMedium)); output.mortonSignedLess_full_3 = uint32_t3(morton_full_3_signed.lessThan(Vec3BSignedFull)); + // output.mortonSignedLess_emulated_3 = uint32_t3(morton_emulated_3_signed.lessThan(Vec3BSignedFull)); output.mortonSignedLess_small_4 = uint32_t4(morton_small_4_signed.lessThan(Vec4BSignedSmall)); output.mortonSignedLess_medium_4 = uint32_t4(morton_medium_4_signed.lessThan(Vec4BSignedMedium)); output.mortonSignedLess_full_4 = uint32_t4(morton_full_4_signed.lessThan(Vec4BSignedFull)); + // output.mortonSignedLess_emulated_4 = uint32_t4(morton_emulated_4_signed.lessThan(Vec4BSignedFull)); // Cast to uint16_t which is what left shift for Mortons expect uint16_t castedShift = uint16_t(input.shift); @@ -231,7 +236,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa left_shift_operator > leftShiftEmulated4; output.mortonLeftShift_emulated_4 = leftShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - // // Unsigned right-shift + // Unsigned right-shift arithmetic_right_shift_operator > rightShiftSmall2; output.mortonUnsignedRightShift_small_2 = rightShiftSmall2(morton_small_2A, castedShift % smallBits_2); arithmetic_right_shift_operator > rightShiftMedium2; @@ -259,13 +264,15 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa arithmetic_right_shift_operator > rightShiftEmulated4; output.mortonUnsignedRightShift_emulated_4 = rightShiftEmulated4(morton_emulated_4A, castedShift % fullBits_4); - // // Signed right-shift + // Signed right-shift arithmetic_right_shift_operator > rightShiftSignedSmall2; output.mortonSignedRightShift_small_2 = rightShiftSignedSmall2(morton_small_2_signed, castedShift % smallBits_2); arithmetic_right_shift_operator > rightShiftSignedMedium2; output.mortonSignedRightShift_medium_2 = rightShiftSignedMedium2(morton_medium_2_signed, castedShift % mediumBits_2); arithmetic_right_shift_operator > rightShiftSignedFull2; output.mortonSignedRightShift_full_2 = rightShiftSignedFull2(morton_full_2_signed, castedShift % fullBits_2); + // arithmetic_right_shift_operator > rightShiftSignedEmulated2; + // output.mortonSignedRightShift_emulated_2 = rightShiftSignedEmulated2(morton_emulated_2_signed, castedShift % fullBits_2); arithmetic_right_shift_operator > rightShiftSignedSmall3; output.mortonSignedRightShift_small_3 = rightShiftSignedSmall3(morton_small_3_signed, castedShift % smallBits_3); @@ -273,6 +280,8 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonSignedRightShift_medium_3 = rightShiftSignedMedium3(morton_medium_3_signed, castedShift % mediumBits_3); arithmetic_right_shift_operator > rightShiftSignedFull3; output.mortonSignedRightShift_full_3 = rightShiftSignedFull3(morton_full_3_signed, castedShift % fullBits_3); + // arithmetic_right_shift_operator > rightShiftSignedEmulated3; + // output.mortonSignedRightShift_emulated_3 = rightShiftSignedEmulated3(morton_emulated_3_signed, castedShift % fullBits_3); arithmetic_right_shift_operator > rightShiftSignedSmall4; output.mortonSignedRightShift_small_4 = rightShiftSignedSmall4(morton_small_4_signed, castedShift % smallBits_4); @@ -280,5 +289,7 @@ void fillTestValues(NBL_CONST_REF_ARG(InputTestValues) input, NBL_REF_ARG(TestVa output.mortonSignedRightShift_medium_4 = rightShiftSignedMedium4(morton_medium_4_signed, castedShift % mediumBits_4); arithmetic_right_shift_operator > rightShiftSignedFull4; output.mortonSignedRightShift_full_4 = rightShiftSignedFull4(morton_full_4_signed, castedShift % fullBits_4); + // arithmetic_right_shift_operator > rightShiftSignedEmulated4; + // output.mortonSignedRightShift_emulated_4 = rightShiftSignedEmulated4(morton_emulated_4_signed, castedShift % fullBits_4); } \ No newline at end of file From 7011ea0c5787518d5fe72977bc1af61eeccefd1d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:41:52 +0700 Subject: [PATCH 47/57] Fix example 28 to use select instead of ternary_op --- 28_FFTBloom/app_resources/fft_convolve_ifft.hlsl | 12 +++++------- .../app_resources/kernel_fft_second_axis.hlsl | 10 ++++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index 07c2ec8cf..ffb405eef 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -68,8 +68,6 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase // This one shows up a lot so we give it a name const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u; - ternary_operator > ternaryOp; - // Since every two consecutive columns are stored as one packed column, we divide the index by 2 to get the index of that packed column const uint32_t firstIndex = workgroup::SubgroupContiguousIndex() / 2; int32_t paddedIndex = int32_t(firstIndex) - pushConstants.halfPadding; @@ -93,17 +91,17 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase if (glsl::gl_WorkGroupID().x) { - complex_t lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, hi, lo); + preloaded[localElementIndex] = select(oddThread ^ invert, hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; - preloaded[localElementIndex] = ternaryOp(invert, Zero, ternaryOp(oddThread, hi, lo)); + preloaded[localElementIndex] = select(invert, Zero, select(oddThread, hi, lo)); #endif // ------------------------------------------------ END PADDING DIVERGENCE ---------------------------------------------------------------------------------------- } @@ -116,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = ternaryOp(oddThread ^ invert, oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = select(oddThread ^ invert, oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl index eaecb5d0f..a1e5a76cd 100644 --- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl @@ -46,8 +46,6 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas // This one shows up a lot so we give it a name const bool oddThread = glsl::gl_SubgroupInvocationID() & 1u; - ternary_operator > ternaryOp; - if (glsl::gl_WorkGroupID().x) { // Even thread must index a y corresponding to an even element of the previous FFT pass, and the odd thread must index its DFT Mirror @@ -72,10 +70,10 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const vector loOrHiVector = vector (loOrHi.real(), loOrHi.imag()); const vector otherThreadloOrHiVector = glsl::subgroupShuffleXor< vector >(loOrHiVector, 1u); const complex_t otherThreadLoOrHi = { otherThreadloOrHiVector.x, otherThreadloOrHiVector.y }; - complex_t lo = ternaryOp(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = ternaryOp(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); - preloaded[channel][localElementIndex] = ternaryOp(oddThread, hi, lo); + preloaded[channel][localElementIndex] = select(oddThread, hi, lo); packedColumnIndex += WorkgroupSize / 2; } @@ -112,7 +110,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[channel][localElementIndex] = ternaryOp(oddThread, oddThreadHi, evenThreadLo); + preloaded[channel][localElementIndex] = select(oddThread, oddThreadHi, evenThreadLo); packedColumnIndex += WorkgroupSize / 2; } From 02eed2e1c81446c9f757f1fc7dc3a283abeabf23 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 12 Dec 2025 23:58:34 +0700 Subject: [PATCH 48/57] Fix example 28 --- 28_FFTBloom/app_resources/fft_convolve_ifft.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index ffb405eef..1b8a4c076 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -97,7 +97,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = select(oddThread ^ invert, hi, lo); + preloaded[localElementIndex] = select(_static_cast(oddThread ^ invert), hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; @@ -114,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = select(oddThread ^ invert, oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } From 30b4f52c17cf8f6ae2aaae9b846aa1048954cd16 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 02:26:15 +0700 Subject: [PATCH 49/57] prefix select with hlsl:: --- 28_FFTBloom/app_resources/fft_convolve_ifft.hlsl | 10 +++++----- 28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index 1b8a4c076..61a819992 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -91,17 +91,17 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase if (glsl::gl_WorkGroupID().x) { - complex_t lo = select(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = select(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = select(_static_cast(oddThread ^ invert), hi, lo); + preloaded[localElementIndex] = hlsl::select(_static_cast(oddThread ^ invert), hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; - preloaded[localElementIndex] = select(invert, Zero, select(oddThread, hi, lo)); + preloaded[localElementIndex] = hlsl::select(invert, Zero, hlsl::select(oddThread, hi, lo)); #endif // ------------------------------------------------ END PADDING DIVERGENCE ---------------------------------------------------------------------------------------- } @@ -114,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = hlsl::select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl index a1e5a76cd..6276ed02e 100644 --- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl @@ -70,10 +70,10 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const vector loOrHiVector = vector (loOrHi.real(), loOrHi.imag()); const vector otherThreadloOrHiVector = glsl::subgroupShuffleXor< vector >(loOrHiVector, 1u); const complex_t otherThreadLoOrHi = { otherThreadloOrHiVector.x, otherThreadloOrHiVector.y }; - complex_t lo = select(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = select(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); - preloaded[channel][localElementIndex] = select(oddThread, hi, lo); + preloaded[channel][localElementIndex] = hlsl::select(oddThread, hi, lo); packedColumnIndex += WorkgroupSize / 2; } @@ -110,7 +110,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[channel][localElementIndex] = select(oddThread, oddThreadHi, evenThreadLo); + preloaded[channel][localElementIndex] = hlsl::select(oddThread, oddThreadHi, evenThreadLo); packedColumnIndex += WorkgroupSize / 2; } From 3e443b12e4511240fa783cc4bab6291c0d115ed9 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 13 Dec 2025 02:52:17 +0700 Subject: [PATCH 50/57] Add nbl prefix to hlsl::select --- 28_FFTBloom/app_resources/fft_convolve_ifft.hlsl | 10 +++++----- 28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index 61a819992..a0c1133cc 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -91,17 +91,17 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase if (glsl::gl_WorkGroupID().x) { - complex_t lo = hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = hlsl::select(_static_cast(oddThread ^ invert), hi, lo); + preloaded[localElementIndex] = nbl::hlsl::select(_static_cast(oddThread ^ invert), hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; - preloaded[localElementIndex] = hlsl::select(invert, Zero, hlsl::select(oddThread, hi, lo)); + preloaded[localElementIndex] = nbl::hlsl::select(invert, Zero, nbl::hlsl::select(oddThread, hi, lo)); #endif // ------------------------------------------------ END PADDING DIVERGENCE ---------------------------------------------------------------------------------------- } @@ -114,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = hlsl::select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = nbl::hlsl::select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl index 6276ed02e..eca81e859 100644 --- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl @@ -70,10 +70,10 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const vector loOrHiVector = vector (loOrHi.real(), loOrHi.imag()); const vector otherThreadloOrHiVector = glsl::subgroupShuffleXor< vector >(loOrHiVector, 1u); const complex_t otherThreadLoOrHi = { otherThreadloOrHiVector.x, otherThreadloOrHiVector.y }; - complex_t lo = hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); - complex_t hi = hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); + complex_t lo = nbl::hlsl::select(oddThread, otherThreadLoOrHi, loOrHi); + complex_t hi = nbl::hlsl::select(oddThread, loOrHi, otherThreadLoOrHi); fft::unpack(lo, hi); - preloaded[channel][localElementIndex] = hlsl::select(oddThread, hi, lo); + preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, hi, lo); packedColumnIndex += WorkgroupSize / 2; } @@ -110,7 +110,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[channel][localElementIndex] = hlsl::select(oddThread, oddThreadHi, evenThreadLo); + preloaded[channel][localElementIndex] = nbl::hlsl::select(oddThread, oddThreadHi, evenThreadLo); packedColumnIndex += WorkgroupSize / 2; } From 07d0197eff60cae99ff226825b942ec64b3504d7 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Fri, 12 Dec 2025 21:29:55 -0300 Subject: [PATCH 51/57] Patch for semantic clarity, remove usage of hlsl keyword named variable --- 28_FFTBloom/app_resources/fft_convolve_ifft.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index a0c1133cc..02ae4ff40 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -80,7 +80,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase { // If mirrored, we need to invert which thread is loading lo and which is loading hi // If using zero-padding, useful to find out if we're outside of [0,1) bounds - bool invert = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength; + bool inPadding = paddedIndex < 0 || paddedIndex >= pushConstants.imageHalfRowLength; int32_t wrappedIndex = paddedIndex < 0 ? ~paddedIndex : paddedIndex; // ~x = - x - 1 in two's complement (except maybe at the borders of representable range) wrappedIndex = paddedIndex < pushConstants.imageHalfRowLength ? wrappedIndex : pushConstants.imageRowLength + ~paddedIndex; const complex_t loOrHi = colMajorAccessor.get(colMajorOffset(wrappedIndex, y)); @@ -97,11 +97,11 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase // --------------------------------------------------- MIRROR PADDING ------------------------------------------------------------------------------------------- #ifdef MIRROR_PADDING - preloaded[localElementIndex] = nbl::hlsl::select(_static_cast(oddThread ^ invert), hi, lo); + preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, hi, lo); // ----------------------------------------------------- ZERO PADDING ------------------------------------------------------------------------------------------- #else const complex_t Zero = { scalar_t(0), scalar_t(0) }; - preloaded[localElementIndex] = nbl::hlsl::select(invert, Zero, nbl::hlsl::select(oddThread, hi, lo)); + preloaded[localElementIndex] = nbl::hlsl::select(inPadding, Zero, nbl::hlsl::select(oddThread, hi, lo)); #endif // ------------------------------------------------ END PADDING DIVERGENCE ---------------------------------------------------------------------------------------- } @@ -114,7 +114,7 @@ struct PreloadedSecondAxisAccessor : PreloadedAccessorMirrorTradeBase const complex_t evenThreadLo = { loOrHi.real(), otherThreadLoOrHi.real() }; // Odd thread writes `hi = Z1 + iN1` const complex_t oddThreadHi = { otherThreadLoOrHi.imag(), loOrHi.imag() }; - preloaded[localElementIndex] = nbl::hlsl::select(_static_cast(oddThread ^ invert), oddThreadHi, evenThreadLo); + preloaded[localElementIndex] = nbl::hlsl::select(oddThread != inPadding, oddThreadHi, evenThreadLo); } paddedIndex += WorkgroupSize / 2; } From 8a20833f36be08910e307cd59b9e2550b0cfe0f1 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 14 Dec 2025 12:14:47 +0100 Subject: [PATCH 52/57] refactor slightly --- .../app_resources/binarySearch.comp.hlsl | 11 ++++---- .../app_resources/common.h | 14 ++++------ .../app_resources/present.frag.hlsl | 19 -------------- 72_CooperativeBinarySearch/main.cpp | 26 ++++++++++--------- 4 files changed, 25 insertions(+), 45 deletions(-) delete mode 100644 72_CooperativeBinarySearch/app_resources/present.frag.hlsl diff --git a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl index 05c0d8464..0834e8f91 100644 --- a/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl +++ b/72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl @@ -1,18 +1,18 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #pragma wave shader_stage(compute) #include "common.h" + #include "nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl" + using namespace nbl::hlsl; [[vk::push_constant]] PushConstants Constants; [[vk::binding(0)]] StructuredBuffer Histogram; [[vk::binding(1)]] RWStructuredBuffer Output; -static const uint32_t GroupsharedSize = 256; uint getNextPowerOfTwo(uint number) { return 2 << firstbithigh(number - 1); @@ -61,9 +61,10 @@ uint binarySearchLowerBoundFindValue(uint findValue, StructuredBuffer sear return left + firstLaneGreaterThan - 1; } +static const uint32_t GroupsharedSize = WorkgroupSize; groupshared uint shared_groupSearchBufferMinIndex; groupshared uint shared_groupSearchBufferMaxIndex; -groupshared uint shared_groupSearchValues[GroupsharedSize]; +groupshared uint shared_groupSearchValues[WorkgroupSize]; // Binary search using the entire workgroup, making it log32 or log64 (every iteration, the possible set of // values is divided by the number of lanes in a wave) @@ -112,7 +113,7 @@ uint binarySearchLowerBoundCooperative(uint groupIndex, uint groupThread, Struct return laneValue; } -[numthreads(256, 1, 1)] +[numthreads(WorkgroupSize,1,1)] void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID) { Output[thread.x] = binarySearchLowerBoundCooperative(group.x, groupThread.x, Histogram, Constants.EntityCount); diff --git a/72_CooperativeBinarySearch/app_resources/common.h b/72_CooperativeBinarySearch/app_resources/common.h index 4a3cacaa4..65f606b08 100644 --- a/72_CooperativeBinarySearch/app_resources/common.h +++ b/72_CooperativeBinarySearch/app_resources/common.h @@ -1,19 +1,15 @@ -#ifndef _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ -#define _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ +#ifndef _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ +#define _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ #include #include -using namespace nbl::hlsl; -namespace nbl { -namespace hlsl { +// TODO: NBL_CONSTEXPR_NSPC_VAR +static const uint32_t WorkgroupSize = 256; struct PushConstants { uint32_t EntityCount; }; -}; -}; - -#endif // _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_ +#endif // _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ diff --git a/72_CooperativeBinarySearch/app_resources/present.frag.hlsl b/72_CooperativeBinarySearch/app_resources/present.frag.hlsl deleted file mode 100644 index 22695657c..000000000 --- a/72_CooperativeBinarySearch/app_resources/present.frag.hlsl +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#pragma wave shader_stage(fragment) - -// vertex shader is provided by the fullScreenTriangle extension -#include -using namespace nbl::hlsl; -using namespace ext::FullScreenTriangle; - -// binding 0 set 0 -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; - -[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 -{ - return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); -} \ No newline at end of file diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index 828adf34f..81724c1b8 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -20,11 +20,14 @@ using namespace nbl::ui; using namespace nbl::video; using namespace nbl::examples; -//using namespace glm; - -static constexpr uint32_t TestCaseIndices[] = { +// +constexpr uint32_t TestCaseIndices[] = { #include "testCaseData.h" }; +constexpr uint32_t numIndices = sizeof(TestCaseIndices) / sizeof(TestCaseIndices[0]); +constexpr uint32_t lastValue = TestCaseIndices[numIndices - 1]; +// just some extra stuff over the edge +constexpr uint32_t totalValues = lastValue + 100; void cpu_tests(); @@ -85,7 +88,7 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp SPushConstantRange pcRange = {}; pcRange.stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE; pcRange.offset = 0u; - pcRange.size = sizeof(nbl::hlsl::PushConstants); + pcRange.size = sizeof(PushConstants); auto layout = m_device->createPipelineLayout({ &pcRange,1 }, smart_refctd_ptr(m_descriptorSetLayout)); IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); @@ -94,11 +97,12 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) return logFail("Failed to create compute pipeline!\n"); } - + + const size_t sizes[2] = {sizeof(TestCaseIndices),sizeof(uint32_t)*totalValues}; for (uint32_t i = 0; i < bindingCount; i++) { m_buffers[i] = m_device->createBuffer(IGPUBuffer::SCreationParams { - {.size = 500000, .usage = + {.size = sizes[i], .usage = IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT | IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT, } @@ -146,7 +150,8 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp memcpy( reinterpret_cast(outPtr), reinterpret_cast(&TestCaseIndices[0]), - sizeof(TestCaseIndices)); + sizeof(TestCaseIndices) + ); // In contrast to fences, we just need one semaphore to rule all dispatches return true; @@ -187,16 +192,13 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp const IGPUDescriptorSet* set = m_descriptorSet.get(); - const uint32_t numIndices = sizeof(TestCaseIndices) / sizeof(TestCaseIndices[0]); - const uint32_t lastValue = TestCaseIndices[numIndices - 1]; - const uint32_t totalValues = lastValue + 100; - nbl::hlsl::PushConstants coopBinarySearchPC = { + PushConstants coopBinarySearchPC = { .EntityCount = numIndices, }; m_cmdbuf->bindComputePipeline(m_pipeline.get()); m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0u, 1u, &set); - m_cmdbuf->pushConstants(m_pipeline->getLayout(), nbl::hlsl::ShaderStage::ESS_COMPUTE, 0u, sizeof(nbl::hlsl::PushConstants), &coopBinarySearchPC); + m_cmdbuf->pushConstants(m_pipeline->getLayout(), nbl::hlsl::ShaderStage::ESS_COMPUTE, 0u, sizeof(PushConstants), &coopBinarySearchPC); m_cmdbuf->dispatch((totalValues + 255u) / 256u, 1u, 1u); layoutBufferBarrier[0].barrier.dep = layoutBufferBarrier[0].barrier.dep.nextBarrier(PIPELINE_STAGE_FLAGS::COPY_BIT,ACCESS_FLAGS::TRANSFER_READ_BIT); From 4425ec1454acd2e7771f290d7b5f08fd9dbcb07b Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:32:18 +0100 Subject: [PATCH 53/57] ambiguity of `is_same_v` patched up --- 22_CppCompat/app_resources/test.comp.hlsl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/22_CppCompat/app_resources/test.comp.hlsl b/22_CppCompat/app_resources/test.comp.hlsl index 98be76c53..17c59f970 100644 --- a/22_CppCompat/app_resources/test.comp.hlsl +++ b/22_CppCompat/app_resources/test.comp.hlsl @@ -3,9 +3,6 @@ //// For conditions of distribution and use, see copyright notice in nabla.h #include "app_resources/common.hlsl" -template -const static bool is_same_v = nbl::hlsl::is_same_v; - struct PushConstants { @@ -88,6 +85,7 @@ struct device_capabilities2 }; [numthreads(8, 8, 1)] +[shader("compute")] void main(uint3 invocationID : SV_DispatchThreadID) { fill(invocationID, 1); @@ -157,9 +155,9 @@ void main(uint3 invocationID : SV_DispatchThreadID) { static const uint16_t TEST_VALUE_0 = 5; static const uint32_t TEST_VALUE_1 = 0x80000000u; - static const uint32_t TEST_VALUE_2 = 0x8000000000000000u; + static const uint32_t TEST_VALUE_2 = 0x8000000000000000u; // TODO: Przmek is this intended? it warns because its too big from uint32_t static const uint32_t TEST_VALUE_3 = 0x00000001u; - static const uint32_t TEST_VALUE_4 = 0x0000000000000001u; + static const uint32_t TEST_VALUE_4 = 0x0000000000000001u; // TODO: Przmek is this intended? it warns because its too big from uint32_t fill(invocationID, 5.01); From 1c6458d81b83aea176ac7ebda7450a9b395a85bd Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 17 Dec 2025 22:23:10 +0300 Subject: [PATCH 54/57] A lot more debuggability, and: - Camera movement is disabled correctly - Hacked ViewManipulate to use for the cube itself - Added a storage buffer for debugging and getting stuff from GPU to CPU - Most importantly, disabled skew, used TRS for that - Random OBB buttons - Detection of mismatch of silhouette vertices (between slow more correct algo vs fast LUT based algo) --- .../app_resources/hlsl/Drawing.hlsl | 172 +++++ .../hlsl/SolidAngleVis.frag.hlsl | 644 +++++++++--------- .../app_resources/hlsl/common.hlsl | 49 +- .../app_resources/hlsl/utils.hlsl | 23 + 72_SolidAngleVisualizer/include/transform.hpp | 73 +- 72_SolidAngleVisualizer/main.cpp | 375 ++++++++-- .../include/nbl/examples/cameras/CCamera.hpp | 5 + 7 files changed, 939 insertions(+), 402 deletions(-) create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl create mode 100644 72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl new file mode 100644 index 000000000..c3cb5befa --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -0,0 +1,172 @@ +#ifndef _DEBUG_HLSL_ +#define _DEBUG_HLSL_ +#include "common.hlsl" + +float2 sphereToCircle(float3 spherePoint) +{ + if (spherePoint.z >= 0.0f) + { + return spherePoint.xy * CIRCLE_RADIUS; + } + else + { + float r2 = (1.0f - spherePoint.z) / (1.0f + spherePoint.z); + float uv2Plus1 = r2 + 1.0f; + return (spherePoint.xy * uv2Plus1 / 2.0f) * CIRCLE_RADIUS; + } +} + +float4 drawGreatCircleArc(float3 fragPos, float3 points[2], int visibility, float aaWidth) +{ + if (visibility == 0) return float4(0,0,0,0); + + float3 v0 = normalize(points[0]); + float3 v1 = normalize(points[1]); + float3 p = normalize(fragPos); + + float3 arcNormal = normalize(cross(v0, v1)); + float dist = abs(dot(p, arcNormal)); + + float dotMid = dot(v0, v1); + bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); + + if (!onArc) return float4(0,0,0,0); + + float avgDepth = (length(points[0]) + length(points[1])) * 0.5f; + float depthScale = 3.0f / avgDepth; + + float baseWidth = (visibility == 1) ? 0.01f : 0.005f; + float width = min(baseWidth * depthScale, 0.02f); + + float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); + + float4 edgeColor = (visibility == 1) ? + float4(0.0f, 0.5f, 1.0f, 1.0f) : + float4(1.0f, 0.0f, 0.0f, 1.0f); + + float intensity = (visibility == 1) ? 1.0f : 0.5f; + return edgeColor * alpha * intensity; +} + +float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) +{ + float4 color = float4(0,0,0,0); + float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); + + for (int i = 0; i < 12; i++) + { + if ((silEdgeMask & (1u << i)) == 0) + { + int2 edge = allEdges[i]; + float3 edgePoints[2] = { corners[edge.x], corners[edge.y] }; + float4 edgeContribution = drawGreatCircleArc(spherePos, edgePoints, 1, aaWidth); + color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + } + } + return color; +} + +float4 drawCorners(float3 spherePos, float2 p, float aaWidth) +{ + float4 color = float4(0,0,0,0); + for (int i = 0; i < 8; i++) + { + float3 corner3D = normalize(corners[i]); + float2 cornerPos = sphereToCircle(corner3D); + float dist = length(p - cornerPos); + float dotSize = 0.02f; + float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); + if (dotAlpha > 0.0f) + { + float3 dotColor = colorLUT[i]; + color += float4(dotColor * dotAlpha, dotAlpha); + } + } + return color; +} + +float4 drawRing(float2 p, float aaWidth) +{ + float positionLength = length(p); + float ringWidth = 0.002f; + float ringDistance = abs(positionLength - CIRCLE_RADIUS); + float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); + return ringAlpha * float4(1, 1, 1, 1); +} + +// Check if a face on the hemisphere is visible from camera at origin +bool isFaceVisible(float3 faceCenter, float3 faceNormal) +{ + float3 viewVec = normalize(-faceCenter); // Vector from camera to face + return dot(faceNormal, viewVec) > 0.0f; +} + +int getEdgeVisibility(int edgeIdx) +{ + int2 faces = edgeToFaces[edgeIdx]; + + // Transform normals to world space + float3x3 rotMatrix = (float3x3)pc.modelMatrix; + float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); + float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); + + bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); + bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); + + // Silhouette: exactly one face visible + if (visible1 != visible2) return 1; + + // Inner edge: both faces visible + if (visible1 && visible2) return 2; + + // Hidden edge: both faces hidden + return 0; +} + +#if DEBUG_DATA +uint32_t computeGroundTruthEdgeMask() +{ + uint32_t mask = 0u; + NBL_UNROLL + for (int j = 0; j < 12; j++) + { + // getEdgeVisibility returns 1 for a silhouette edge based on 3D geometry + if (getEdgeVisibility(j) == 1) + { + mask |= (1u << j); + } + } + return mask; +} + +void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSilMask) +{ + uint32_t mismatchAccumulator = 0; + + // The Ground Truth now represents the full 3D silhouette, clipped or not. + uint32_t groundTruthMask = computeGroundTruthEdgeMask(); + + // The comparison checks if the generated mask perfectly matches the full 3D ground truth. + uint32_t mismatchMask = groundTruthMask ^ generatedSilMask; + + if (mismatchMask != 0) + { + NBL_UNROLL + for (int j = 0; j < 12; j++) + { + if ((mismatchMask >> j) & 1u) + { + int2 edge = allEdges[j]; + // Accumulate vertex indices where error occurred + mismatchAccumulator |= (1u << edge.x) | (1u << edge.y); + } + } + } + + // Simple Write (assuming all fragments calculate the same result) + InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); +} +#endif + + +#endif // _DEBUG_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 51cb1946d..cd291dbd2 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -1,376 +1,374 @@ #pragma wave shader_stage(fragment) #include "common.hlsl" - #include +#include "utils.hlsl" using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; [[vk::push_constant]] struct PushConstants pc; +[[vk::binding(0, 0)]] RWStructuredBuffer DebugDataBuffer; -static const float CIRCLE_RADIUS = 0.75f; +static const float CIRCLE_RADIUS = 0.5f; // --- Geometry Utils --- -// Adjacency of edges to faces -static const int2 edgeToFaces[12] = { - {4,2}, {3,4}, {2,5}, {5,3}, - {2,0}, {0,3}, {1,2}, {3,1}, - {0,4}, {5,0}, {4,1}, {1,5} -}; - -//float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f static const float3 constCorners[8] = { - float3(-1, -1, -1), // 0 - float3( 1, -1, -1), // 1 - float3(-1, 1, -1), // 2 - float3( 1, 1, -1), // 3 - float3(-1, -1, 1), // 4 - float3( 1, -1, 1), // 5 - float3(-1, 1, 1), // 6 - float3( 1, 1, 1) // 7 + float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), + float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1) }; -// All 12 edges of the cube (vertex index pairs) static const int2 allEdges[12] = { - {0, 1}, {2, 3}, {4, 5}, {6, 7}, // Edges along X axis - {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Edges along Y axis - {0, 4}, {1, 5}, {2, 6}, {3, 7} // Edges along Z axis + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis + {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Y axis + {0, 4}, {1, 5}, {2, 6}, {3, 7} // Z axis }; -static const float3 localNormals[6] = { - float3(0, 0, -1), // Face 0 (Z-) - float3(0, 0, 1), // Face 1 (Z+) - float3(-1, 0, 0), // Face 2 (X-) - float3(1, 0, 0), // Face 3 (X+) - float3(0, -1, 0), // Face 4 (Y-) - float3(0, 1, 0) // Face 5 (Y+) +// Adjacency of edges to faces +// Corrected Adjacency of edges to faces +static const int2 edgeToFaces[12] = { + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) }; - static float3 corners[8]; -static float3 faceCenters[6] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), - float3(0,0,0), float3(0,0,0), float3(0,0,0) }; - - -static const float3 colorLUT[27] = { - // Row 1: Pure and bright colors - float3(0, 0, 0), // 0: Black - float3(1, 1, 1), // 1: White - float3(0.5, 0.5, 0.5), // 2: Gray - - // Row 2: Primary colors - float3(1, 0, 0), // 3: Red - float3(0, 1, 0), // 4: Green - float3(0, 0, 1), // 5: Blue - - // Row 3: Secondary colors - float3(1, 1, 0), // 6: Yellow - float3(1, 0, 1), // 7: Magenta - float3(0, 1, 1), // 8: Cyan - - // Row 4: Orange family - float3(1, 0.5, 0), // 9: Orange - float3(1, 0.65, 0), // 10: Light Orange - float3(0.8, 0.4, 0), // 11: Dark Orange - - // Row 5: Pink/Rose family - float3(1, 0.4, 0.7), // 12: Pink - float3(1, 0.75, 0.8), // 13: Light Pink - float3(0.7, 0.1, 0.3), // 14: Deep Rose - - // Row 6: Purple/Violet family - float3(0.5, 0, 0.5), // 15: Purple - float3(0.6, 0.4, 0.8), // 16: Light Purple - float3(0.3, 0, 0.5), // 17: Indigo - - // Row 7: Green variations - float3(0, 0.5, 0), // 18: Dark Green - float3(0.5, 1, 0), // 19: Lime - float3(0, 0.5, 0.25), // 20: Forest Green - - // Row 8: Blue variations - float3(0, 0, 0.5), // 21: Navy - float3(0.3, 0.7, 1), // 22: Sky Blue - float3(0, 0.4, 0.6), // 23: Teal - - // Row 9: Earth tones - float3(0.6, 0.4, 0.2), // 24: Brown - float3(0.8, 0.7, 0.3), // 25: Tan/Beige - float3(0.4, 0.3, 0.1) // 26: Dark Brown +static float3 faceCenters[6] = { + float3(0,0,0), float3(0,0,0), float3(0,0,0), + float3(0,0,0), float3(0,0,0), float3(0,0,0) +}; + +static const float3 localNormals[6] = { + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) }; - +// TODO: unused, remove later // Vertices are ordered CCW relative to the camera view. static const int silhouettes[27][7] = { - {6, 1, 3, 2, 6, 4, 5}, // 0: Black - {6, 2, 6, 4, 5, 7, 3}, // 1: White - {6, 0, 4, 5, 7, 3, 2}, // 2: Gray - {6, 1, 3, 7, 6, 4, 5,}, // 3: Red - {4, 4, 5, 7, 6, -1, -1}, // 4: Green - {6, 0, 4, 5, 7, 6, 2}, // 5: Blue - {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow - {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta - {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan - {6, 1, 3, 2, 6, 7, 5}, // 9: Orange - {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange - {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange - {4, 1, 3, 7, 5, -1, -1}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink - {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose - {6, 0, 1, 3, 7, 5, 4}, // 15: Purple - {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple - {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo - {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green - {6, 0, 2, 6, 7, 3, 1}, // 19: Lime - {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green - {6, 0, 2, 3, 7, 5, 1}, // 21: Navy - {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue - {6, 0, 4, 6, 2, 3, 1}, // 23: Teal - {6, 0, 2, 3, 7, 5, 4}, // 24: Brown - {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige - {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5,}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; -// Converts UV into centered, aspect-corrected NDC circle space -float2 toCircleSpace(float2 uv) -{ - // Map [0,1] UV to [-1,1] - float2 p = uv * 2.0f - 1.0f; - - // Correct aspect ratio - float aspect = pc.viewport.z / pc.viewport.w; // width / height - p.x *= aspect; - - return p * CIRCLE_RADIUS; -} +// Binary packed silhouettes +static const uint32_t binSilhouettes[27] = { + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, +}; -void computeCubeGeo() +int getSilhouetteVertex(uint32_t packedSil, int index) { - for (int i = 0; i < 8; i++) - { - float3 localPos = constCorners[i]; //float3(i % 2, (i / 2) % 2, (i / 4) % 2) * 2.0f - 1.0f; - float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - - corners[i] = worldPos.xyz; - - faceCenters[i/4] += worldPos / 4.0f; - faceCenters[2+i%2] += worldPos / 4.0f; - faceCenters[4+(i/2)%2] += worldPos / 4.0f; - } + return (packedSil >> (3 * index)) & 0x7; } -float4 drawCorners(float3 spherePos, float aaWidth) +// Get silhouette size +int getSilhouetteSize(uint32_t sil) { - float4 color = float4(0,0,0,0); - // Draw corner labels for debugging - for (int i = 0; i < 8; i++) - { - float3 corner = normalize(corners[i]); - float2 cornerPos = corner.xy; - // Project corner onto 2D circle space - - // Distance from current fragment to corner - float dist = length(spherePos.xy - cornerPos); - - // Draw a small colored dot at the corner - float dotSize = 0.03f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); - - if (dotAlpha > 0.0f) - { - float brightness = float(i) / 7.0f; - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); - } - } - return color; + return (sil >> 29) & 0x7; + } -float4 drawRing(float2 p, float aaWidth) +// Check if vertex has negative z +bool getVertexZNeg(int vertexIdx) { - float positionLength = length(p); - - // Add a white background circle ring - float ringWidth = 0.01f; - float ringDistance = abs(positionLength - CIRCLE_RADIUS); - float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); - - return ringAlpha * float4(1, 1, 1, 1); + return normalize(corners[vertexIdx]).z < 0.0f; } -// Check if a face on the hemisphere is visible from camera at origin -bool isFaceVisible(float3 faceCenter, float3 faceNormal) +#include "Drawing.hlsl" + + +void setDebugData(uint32_t sil, int3 region, int configIndex, uint32_t clippedVertexCount) { - // Face is visible if normal points toward camera (at origin) - float3 viewVec = -normalize(faceCenter); // Vector from face to camera - return dot(faceNormal, viewVec) > 0.0f; +#if DEBUG_DATA + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + DebugDataBuffer[0].region = uint3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + DebugDataBuffer[0].clippedVertexCount = clippedVertexCount; + for (int i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; +#endif } -int getEdgeVisibility(int edgeIdx, float3 cameraPos) +float2 toCircleSpace(float2 uv) { - int2 faces = edgeToFaces[edgeIdx]; - - // Transform normals to world space - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 n_world_f1 = mul(rotMatrix, localNormals[faces.x]); - float3 n_world_f2 = mul(rotMatrix, localNormals[faces.y]); - - bool visible1 = isFaceVisible(faceCenters[faces.x], n_world_f1); - bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); - - // Silhouette: exactly one face visible - if (visible1 != visible2) return 1; - - // Inner edge: both faces visible - if (visible1 && visible2) return 2; - - // Hidden edge: both faces hidden - return 0; + float2 p = uv * 2.0f - 1.0f; + float aspect = pc.viewport.z / pc.viewport.w; + p.x *= aspect; + return p; } -// Draw great circle arc in fragment shader with horizon clipping -float4 drawGreatCircleArc(float3 fragPos, int2 edgeVerts, int visibility, float aaWidth) +uint32_t packSilhouette(const int s[7]) { - if (visibility == 0) return float4(0,0,0,0); // Hidden edge - - float3 v0 = normalize(corners[edgeVerts.x]); - float3 v1 = normalize(corners[edgeVerts.y]); - float3 p = normalize(fragPos); // Current point on hemisphere - - // HORIZON CLIPPING: Current fragment must be on front hemisphere - if (p.z < 0.0f) - return float4(0,0,0,0); - - // HORIZON CLIPPING: Skip edge if both endpoints are behind horizon - if (v0.z < 0.0f && v1.z < 0.0f) - return float4(0,0,0,0); - - // Great circle plane normal - float3 arcNormal = normalize(cross(v0, v1)); - - // Distance to great circle - float dist = abs(dot(p, arcNormal)); - - // Check if point is within arc bounds - float dotMid = dot(v0, v1); - bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); - - if (!onArc) return float4(0,0,0,0); - - // Depth-based width scaling - float avgDepth = (length(corners[edgeVerts.x]) + length(corners[edgeVerts.y])) * 0.5f; - float depthScale = 3.0f / avgDepth; - - float baseWidth = (visibility == 1) ? 0.01f : 0.005f; - float width = min(baseWidth * depthScale, 0.02f); - - float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - - float4 edgeColor = (visibility == 1) ? - float4(0.0f, 0.5f, 1.0f, 1.0f) : // Silhouette: blue - float4(1.0f, 0.0f, 0.0f, 1.0f); // Inner: red - - float intensity = (visibility == 1) ? 1.0f : 0.5f; - return edgeColor * alpha * intensity; + uint32_t packed = 0; + int size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (int i = 1; i <= 6; ++i) { + int v = s[i]; + if (v < 0) v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; } -float4 drawHiddenEdges(float3 spherePos, int configIndex, float aaWidth) +void computeCubeGeo() { - float4 color = float4(0,0,0,0); - // Draw the remaining edges (non-silhouette) in a different color - float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); // dark yellow color for hidden edges - - for (int i = 0; i < 12; i++) - { - int2 edge = allEdges[i]; - - // Check if this edge is already drawn as a silhouette edge - bool isSilhouette = false; - int vertexCount = silhouettes[configIndex][0]; - // Draw the 6 silhouette edges - for (int i = 0; i < vertexCount; i++) - { - int v0Idx = silhouettes[configIndex][i + 1]; - int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - - if ((edge.x == v0Idx && edge.y == v1Idx) || (edge.x == v1Idx && edge.y == v0Idx)) - { - isSilhouette = true; - break; - } - } - - // Only draw if it's not a silhouette edge - if (!isSilhouette) - { - float4 edgeContribution = drawGreatCircleArc(spherePos, edge, 1, aaWidth); - color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); - } - } - return color; + for (int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) + { + float3 localPos = constCorners[i]; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + corners[i] = worldPos.xyz; + faceCenters[i / 4] += worldPos / 4.0f; + faceCenters[2 + i % 2] += worldPos / 4.0f; + faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; + } } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float4 color = float4(0, 0, 0, 0); - float2 p = toCircleSpace(vx.uv); - - // Convert 2D disk position to 3D hemisphere position - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - - - - // Convert UV to 3D position on hemisphere - float3 spherePos = normalize(float3(normalized.x, normalized.y, sqrt(1 - r2))); - - computeCubeGeo(); - - // Get OBB center in world space - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; - - float3x3 rotMatrix = (float3x3)pc.modelMatrix; - float3 proj = mul(obbCenter, rotMatrix); // Get all 3 projections at once - - // Get squared column lengths - float lenSqX = dot(rotMatrix[0], rotMatrix[0]); - float lenSqY = dot(rotMatrix[1], rotMatrix[1]); - float lenSqZ = dot(rotMatrix[2], rotMatrix[2]); - - int3 region = int3( - proj.x < -lenSqX ? 0 : (proj.x > lenSqX ? 2 : 1), - proj.y < -lenSqY ? 0 : (proj.y > lenSqY ? 2 : 1), - proj.z < -lenSqZ ? 0 : (proj.z > lenSqZ ? 2 : 1) - ); - - int configIndex = region.x + region.y * 3 + region.z * 9; // 0-26 - - int vertexCount = silhouettes[configIndex][0]; - for (int i = 0; i < vertexCount; i++) - { - int v0Idx = silhouettes[configIndex][i + 1]; - int v1Idx = silhouettes[configIndex][((i + 1) % vertexCount) + 1]; - - float4 edgeContribution = drawGreatCircleArc(spherePos, int2(v0Idx, v1Idx), 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - } - - color += drawHiddenEdges(spherePos, configIndex, aaWidth); - - color += drawCorners(spherePos, aaWidth); - - color += drawRing(p, aaWidth); - - if (all(vx.uv >= float2(0.49f, 0.49f) ) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } - - // if (r2 > 1.1f) - // color.a = 0.0f; // Outside circle, make transparent - - return color; + float4 color = float4(0, 0, 0, 0); + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + float2 p = toCircleSpace(vx.uv); + + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); + + float3 spherePos; + if (r2 <= 1.0f) + { + spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float uv2Plus1 = r2 + 1.0f; + spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); + + computeCubeGeo(); + + float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + + float3x3 upper3x3 = (float3x3)pc.modelMatrix; + +#if 1 + // Compute reciprocal scales + float3 rcpScales = rsqrt(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) + )); + + // Build inverse-rotation-only matrix + float3x3 invRot; + invRot[0] = upper3x3[0] * rcpScales.x; + invRot[1] = upper3x3[1] * rcpScales.y; + invRot[2] = upper3x3[2] * rcpScales.z; + + // Project center into OBB local space + float3 normalizedProj = mul(invRot, obbCenter); +#else + float3 normalizedProj = mul(inverse(upper3x3), obbCenter); +#endif + int3 region = int3( + normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), + normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), + normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1) + ); + int configIndex = region.x + region.y * 3 + region.z * 9; + + // uint32_t sil = packSilhouette(silhouettes[configIndex]); + uint32_t sil = binSilhouettes[configIndex]; + + int vertexCount = getSilhouetteSize(sil); + bool longSilhouette = (vertexCount == 6); + uint32_t silEdgeMask = 0; + +#if DEBUG_DATA + { + for (int i = 0; i < vertexCount; i++) + { + int vIdx = i % vertexCount; + int v1Idx = (i + 1) % vertexCount; + + int v0Corner = getSilhouetteVertex(sil, vIdx); + int v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (int e = 0; e < 12; e++) + { + int2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(sil, vertexCount, silEdgeMask); + } +#endif + // Build clip mask for vertices below horizon (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (int i = 0; i < 6; i++) + { + if (i >= vertexCount) break; + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + int clipCount = countbits(clipMask); + + // Total clipped vertices + int clippedVertCount = vertexCount + (clipMask != 0u ? (2 - clipCount) : 0); + + // Find rotation amount to place positive vertices first + int rotateAmount = 0; + if (clipMask != 0u) + { + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask >> (vertexCount - 1)) & 1u); + + rotateAmount = wrapAround ? + ((firstbithigh(invertedMask) + 1) % vertexCount) : + firstbitlow(clipMask); + } + + // Rotate silhouette bits + uint32_t vertexBits = sil & 0x1FFFFFFF; + uint32_t rotatedVertexBits = rotr(vertexBits, rotateAmount * 3, vertexCount * 3); + uint32_t rotatedSil = (sil & 0xE0000000) | rotatedVertexBits; + + // Rotate the clip mask to match + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + + // Draw clipped silhouette edges + for (int i = 0; i < clippedVertCount; i++) + { + int nextI = (i + 1) % clippedVertCount; + + int vIdx = i % vertexCount; + int v1Idx = nextI % vertexCount; + + // Extract clip bits directly + bool v0Clipped = (rotatedClipMask >> vIdx) & 1u; + bool v1Clipped = (rotatedClipMask >> v1Idx) & 1u; + + // Skip if both clipped + if (v0Clipped && v1Clipped) continue; + + int v0Corner = getSilhouetteVertex(rotatedSil, vIdx); + int v1Corner = getSilhouetteVertex(rotatedSil, v1Idx); + + float3 v0 = normalize(corners[v0Corner]); + float3 v1 = normalize(corners[v1Corner]); + + float3 points[2] = { corners[v0Corner], corners[v1Corner] }; + + // Clip using bit state + if (v0Clipped) + { + float t = v0.z / (v0.z - v1.z); + points[0] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); + } + else if (v1Clipped) + { + float t = v0.z / (v0.z - v1.z); + points[1] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); + } + + // Draw edge + float4 edgeContribution = drawGreatCircleArc(spherePos, points, 1, aaWidth); + color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); + + } + + + setDebugData(sil, region, configIndex, clippedVertCount); + + color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); + color += drawCorners(spherePos, p, aaWidth); + color += drawRing(p, aaWidth); + + if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index 80368d08f..3c87a48bc 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -2,13 +2,52 @@ #define _SOLID_ANGLE_VIS_COMMON_HLSL_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#define DEBUG_DATA 1 - -struct PushConstants +namespace nbl { - nbl::hlsl::float32_t3x4 modelMatrix; - nbl::hlsl::float32_t4 viewport; -}; + namespace hlsl + { + + struct ResultData + { + uint32_t3 region; + uint32_t silhouetteIndex; + + uint32_t silhouetteVertexCount; + uint32_t silhouette; + uint32_t clippedVertexCount; + uint32_t edgeVisibilityMismatch; + + uint32_t vertices[6]; + }; + + struct PushConstants + { + float32_t3x4 modelMatrix; + float32_t4 viewport; + }; + static const float32_t3 colorLUT[27] = { + float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1) + }; +#ifndef __HLSL_VERSION + static const char* colorNames[27] = {"Black", + "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown" + }; +#endif // __HLSL_VERSION + } +} #endif // _SOLID_ANGLE_VIS_COMMON_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl new file mode 100644 index 000000000..4031e048f --- /dev/null +++ b/72_SolidAngleVisualizer/app_resources/hlsl/utils.hlsl @@ -0,0 +1,23 @@ +#ifndef _UTILS_HLSL_ +#define _UTILS_HLSL_ + +// TODO: implemented somewhere else? +// Bit rotation helpers +uint32_t rotl(uint32_t value, uint32_t bits, uint32_t width) +{ + bits = bits % width; + uint32_t mask = (1u << width) - 1u; + value &= mask; + return ((value << bits) | (value >> (width - bits))) & mask; +} + +uint32_t rotr(uint32_t value, uint32_t bits, uint32_t width) +{ + bits = bits % width; + uint32_t mask = (1u << width) - 1u; + value &= mask; + return ((value >> bits) | (value << (width - bits))) & mask; +} + + +#endif // _UTILS_HLSL_ diff --git a/72_SolidAngleVisualizer/include/transform.hpp b/72_SolidAngleVisualizer/include/transform.hpp index 105b2f757..538173223 100644 --- a/72_SolidAngleVisualizer/include/transform.hpp +++ b/72_SolidAngleVisualizer/include/transform.hpp @@ -1,27 +1,21 @@ #ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ #define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ - #include "nbl/ui/ICursorControl.h" - #include "nbl/ext/ImGui/ImGui.h" - #include "imgui/imgui_internal.h" #include "imguizmo/ImGuizmo.h" - struct TransformRequestParams { - float camDistance = 8.f; uint8_t sceneTexDescIx = ~0; - bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = true; }; struct TransformReturnInfo { nbl::hlsl::uint16_t2 sceneResolution = { 1, 1 }; - bool isGizmoWindowHovered; - bool isGizmoBeingUsed; + bool allowCameraMovement = false; }; TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) @@ -35,7 +29,7 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti static bool boundSizing = false; static bool boundSizingSnap = false; - ImGui::Text("Press T/R/G to change gizmo mode"); + ImGui::Text("Use gizmo (T/R/G) or ViewManipulate widget to transform the cube"); if (params.editTransformDecomposition) { @@ -55,11 +49,13 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti mCurrentGizmoOperation = ImGuizmo::SCALE; if (ImGui::RadioButton("Universal", mCurrentGizmoOperation == ImGuizmo::UNIVERSAL)) mCurrentGizmoOperation = ImGuizmo::UNIVERSAL; + + // For UI editing, decompose temporarily float matrixTranslation[3], matrixRotation[3], matrixScale[3]; ImGuizmo::DecomposeMatrixToComponents(matrix, matrixTranslation, matrixRotation, matrixScale); - ImGui::InputFloat3("Tr", matrixTranslation); - ImGui::InputFloat3("Rt", matrixRotation); - ImGui::InputFloat3("Sc", matrixScale); + ImGui::DragFloat3("Tr", matrixTranslation, 0.01f); + ImGui::DragFloat3("Rt", matrixRotation, 0.01f); + ImGui::DragFloat3("Sc", matrixScale, 0.01f); ImGuizmo::RecomposeMatrixFromComponents(matrixTranslation, matrixRotation, matrixScale, matrix); if (mCurrentGizmoOperation != ImGuizmo::SCALE) @@ -101,17 +97,18 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImGuiIO& io = ImGui::GetIO(); float viewManipulateRight = io.DisplaySize.x; float viewManipulateTop = 0; + bool isWindowHovered = false; static ImGuiWindowFlags gizmoWindowFlags = 0; /* - for the "useWindow" case we just render to a gui area, + for the "useWindow" case we just render to a gui area, otherwise to fake full screen transparent window - note that for both cases we make sure gizmo being - rendered is aligned to our texture scene using - imgui "cursor" screen positions + note that for both cases we make sure gizmo being + rendered is aligned to our texture scene using + imgui "cursor" screen positions */ -// TODO: this shouldn't be handled here I think + // TODO: this shouldn't be handled here I think SImResourceInfo info; info.textureID = params.sceneTexDescIx; info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; @@ -128,17 +125,17 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); ImVec2 windowPos = ImGui::GetWindowPos(); ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; - retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; ImGuiWindow* window = ImGui::GetCurrentWindow(); - gizmoWindowFlags = (ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); + gizmoWindowFlags = (isWindowHovered && ImGui::IsMouseHoveringRect(window->InnerRect.Min, window->InnerRect.Max) ? ImGuiWindowFlags_NoMove : 0); } else { @@ -149,21 +146,45 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); ImVec2 cursorPos = ImGui::GetCursorScreenPos(); + isWindowHovered = ImGui::IsWindowHovered(); ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); - retval.sceneResolution = {contentRegionSize.x,contentRegionSize.y}; - retval.isGizmoWindowHovered = ImGui::IsWindowHovered(); + retval.sceneResolution = { contentRegionSize.x,contentRegionSize.y }; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; } + // Standard Manipulate gizmo - let ImGuizmo modify the matrix directly ImGuizmo::Manipulate(cameraView, cameraProjection, mCurrentGizmoOperation, mCurrentGizmoMode, matrix, NULL, useSnap ? &snap[0] : NULL, boundSizing ? bounds : NULL, boundSizingSnap ? boundsSnap : NULL); - retval.isGizmoBeingUsed = ImGuizmo::IsOver() || (ImGuizmo::IsUsing() && ImGui::IsMouseDown(ImGuiMouseButton_Left)); - if(params.enableViewManipulate) - ImGuizmo::ViewManipulate(cameraView, params.camDistance, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + retval.allowCameraMovement = isWindowHovered && !ImGuizmo::IsUsing(); + + // ViewManipulate for rotating the view + if (params.enableViewManipulate) + { + // Store original translation and scale before ViewManipulate + // Decompose original matrix + nbl::hlsl::float32_t3 translation, rotation, scale; + ImGuizmo::DecomposeMatrixToComponents(matrix, &translation.x, &rotation.x, &scale.x); + + float temp[16]; + nbl::hlsl::float32_t3 baseTranslation(0.0f); + nbl::hlsl::float32_t3 baseScale(1.0f); + ImGuizmo::RecomposeMatrixFromComponents(&baseTranslation.x, &rotation.x, &baseScale.x, temp); + // Manipulate rotation only + ImGuizmo::ViewManipulate(temp, 1.0f, ImVec2(viewManipulateRight - 128, viewManipulateTop), ImVec2(128, 128), 0x10101010); + + // Extract rotation from manipulated temp + nbl::hlsl::float32_t3 newRot; + ImGuizmo::DecomposeMatrixToComponents(temp, &baseTranslation.x, &newRot.x, &baseScale.x); + + // Recompose original matrix with new rotation but keep translation & scale + ImGuizmo::RecomposeMatrixFromComponents(&translation.x, &newRot.x, &scale.x, matrix); + + retval.allowCameraMovement &= isWindowHovered && !ImGuizmo::IsUsingViewManipulate(); + } ImGui::End(); ImGui::PopStyleColor(); @@ -171,4 +192,4 @@ TransformReturnInfo EditTransform(float* cameraView, const float* cameraProjecti return retval; } -#endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file +#endif // _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ \ No newline at end of file diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index e9266520d..1c52547af 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -211,7 +211,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR return shader; }; - auto scRes = static_cast(m_surface->getSwapchainResources()); ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); @@ -232,17 +231,73 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR .size = sizeof(PushConstants) } }; - auto visualizationLayout = m_device->createPipelineLayout( - ranges, - nullptr, - nullptr, - nullptr, - nullptr + nbl::video::IGPUDescriptorSetLayout::SBinding bindings[1] = { + { + .binding = 0, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = ShaderStage::ESS_FRAGMENT, + .count = 1 + } + }; + smart_refctd_ptr dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + logFail("Failed to create a Descriptor Layout!\n"); + + + auto visualizationLayout = m_device->createPipelineLayout(ranges +#if DEBUG_DATA + , dsLayout +#endif ); m_visualizationPipeline = fsTriProtoPPln.createPipeline(fragSpec, visualizationLayout.get(), m_solidAngleRenderpass.get()); if (!m_visualizationPipeline) return logFail("Could not create Graphics Pipeline!"); + // Allocate the memory +#if DEBUG_DATA + { + constexpr size_t BufferSize = sizeof(ResultData); + + nbl::video::IGPUBuffer::SCreationParams params = {}; + params.size = BufferSize; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + m_outputStorageBuffer = m_device->createBuffer(std::move(params)); + if (!m_outputStorageBuffer) + logFail("Failed to create a GPU Buffer of size %d!\n", params.size); + + m_outputStorageBuffer->setObjectDebugName("ResultData output buffer"); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = m_outputStorageBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + m_allocation = m_device->allocate(reqs, m_outputStorageBuffer.get(), nbl::video::IDeviceMemoryAllocation::EMAF_NONE); + if (!m_allocation.isValid()) + logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(m_outputStorageBuffer->getBoundMemory().memory == m_allocation.memory.get()); + smart_refctd_ptr pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE, { &dsLayout.get(),1 }); + + m_ds = pool->createDescriptorSet(std::move(dsLayout)); + { + IGPUDescriptorSet::SDescriptorInfo info[1]; + info[0].desc = smart_refctd_ptr(m_outputStorageBuffer); + info[0].info.buffer = { .offset = 0,.size = BufferSize }; + IGPUDescriptorSet::SWriteDescriptorSet writes[1] = { + {.dstSet = m_ds.get(),.binding = 0,.arrayElement = 0,.count = 1,.info = info} + }; + m_device->updateDescriptorSets(writes, {}); + } + } + + if (!m_allocation.memory->map({ 0ull,m_allocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ)) + logFail("Failed to map the Device Memory!\n"); + + // if the mapping is not coherent the range needs to be invalidated to pull in new data for the CPU's caches + const ILogicalDevice::MappedMemoryRange memoryRange(m_allocation.memory.get(), 0ull, m_allocation.memory->getAllocationSize()); + if (!m_allocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memoryRange); +#endif } // Create ImGUI @@ -336,6 +391,15 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; if (m_solidAngleViewFramebuffer) { +#if DEBUG_DATA + asset::SBufferRange range + { + .offset = 0, + .size = m_outputStorageBuffer->getSize(), + .buffer = m_outputStorageBuffer + }; + cb->fillBuffer(range, 0u); +#endif auto creationParams = m_solidAngleViewFramebuffer->getCreationParameters(); cb->beginDebugMarker("Draw Circle View Frame"); { @@ -361,11 +425,17 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR auto pipeline = m_visualizationPipeline; cb->bindGraphicsPipeline(pipeline.get()); cb->pushConstants(pipeline->getLayout(), hlsl::ShaderStage::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); - //cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 3, 1, &ds); + cb->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, pipeline->getLayout(), 0, 1, &m_ds.get()); ext::FullScreenTriangle::recordDrawCall(cb); } cb->endRenderPass(); cb->endDebugMarker(); + +#if DEBUG_DATA + m_device->waitIdle(); + std::memcpy(&m_GPUOutResulData, static_cast(m_allocation.memory->getMappedPointer()), sizeof(ResultData)); + m_device->waitIdle(); +#endif } // draw main view if (m_mainViewFramebuffer) @@ -557,6 +627,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR { if (interface.move) camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + else + camera.mouseKeysUp(); for (const auto& e : events) // here capture { @@ -713,6 +785,13 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR cb->setViewport(0u, 1u, &viewport); } +#if DEBUG_DATA + ~SolidAngleVisualizer() override + { + m_allocation.memory->unmap(); + } +#endif + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers constexpr static inline uint32_t MaxFramesInFlight = 3u; constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; @@ -721,13 +800,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes constexpr static inline auto MaxImGUITextures = 2u + MaxFramesInFlight; - constexpr static inline float32_t4x4 OBBModelMatrixDefault - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 3.0f, 1.0f - }; + static inline ResultData m_GPUOutResulData; // smart_refctd_ptr m_scene; smart_refctd_ptr m_solidAngleRenderpass; @@ -737,6 +810,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR smart_refctd_ptr m_mainViewFramebuffer; smart_refctd_ptr m_visualizationPipeline; // + nbl::video::IDeviceMemoryAllocator::SAllocation m_allocation = {}; + smart_refctd_ptr m_outputStorageBuffer; + smart_refctd_ptr m_ds = nullptr; smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; @@ -794,7 +870,6 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // transformParams.useWindow = true; ImGui::Text("Camera"); - bool viewDirty = false; if (ImGui::RadioButton("LH", isLH)) isLH = true; @@ -827,13 +902,11 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); - if (viewDirty || firstFrame) + if (firstFrame) { camera.setPosition(cameraIntialPosition); camera.setTarget(cameraInitialTarget); - camera.setBackupUpVector(cameraInitialUp); camera.setUpVector(cameraInitialUp); camera.recomputeViewMatrix(); @@ -909,45 +982,35 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR if (ImGui::IsKeyPressed(ImGuiKey_End)) { - m_OBBModelMatrix = OBBModelMatrixDefault; + m_TRS = TRS{}; } - static struct { - float32_t4x4 view, projection, model; - } imguizmoM16InOut; + static struct + { + float32_t4x4 view, projection, model; + } imguizmoM16InOut; - ImGuizmo::SetID(0u); + ImGuizmo::SetID(0u); - // TODO: camera will return hlsl::float32_tMxN - auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); - imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); + // TODO: camera will return hlsl::float32_tMxN + auto view = *reinterpret_cast(camera.getViewMatrix().pointer()); + imguizmoM16InOut.view = hlsl::transpose(getMatrix3x4As4x4(view)); - // TODO: camera will return hlsl::float32_tMxN - imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); - imguizmoM16InOut.model = m_OBBModelMatrix; + // TODO: camera will return hlsl::float32_tMxN + imguizmoM16InOut.projection = hlsl::transpose(*reinterpret_cast(camera.getProjectionMatrix().pointer())); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); - { if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ transformParams.editTransformDecomposition = true; mainViewTransformReturnInfo = EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &imguizmoM16InOut.model[0][0], transformParams); + move = mainViewTransformReturnInfo.allowCameraMovement; - // TODO: camera stops when cursor hovers gizmo, but we also want to stop when gizmo is being used - move = (ImGui::IsMouseDown(ImGuiMouseButton_Left) || mainViewTransformReturnInfo.isGizmoWindowHovered) && (!mainViewTransformReturnInfo.isGizmoBeingUsed); - + ImGuizmo::DecomposeMatrixToComponents(&imguizmoM16InOut.model[0][0], &m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x); + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &imguizmoM16InOut.model[0][0]); } - - // to Nabla + update camera & model matrices - // TODO: make it more nicely, extract: - // - Position by computing inverse of the view matrix and grabbing its translation - // - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position - // But then set the view matrix this way anyway, because up-vector may not be compatible - //const auto& view = camera.getViewMatrix(); - //const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) - m_OBBModelMatrix = imguizmoM16InOut.model; - // object meta display //{ // ImGui::Begin("Object"); @@ -964,12 +1027,193 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImVec2 contentRegionSize = ImGui::GetContentRegionAvail(); solidAngleViewTransformReturnInfo.sceneResolution = uint16_t2(static_cast(contentRegionSize.x), static_cast(contentRegionSize.y)); - solidAngleViewTransformReturnInfo.isGizmoBeingUsed = false; // not used in this view - solidAngleViewTransformReturnInfo.isGizmoWindowHovered = false; // not used in this view + solidAngleViewTransformReturnInfo.allowCameraMovement = false; // not used in this view ImGui::Image({ renderColorViewDescIndices[ERV_SOLID_ANGLE_VIEW] }, contentRegionSize); ImGui::End(); } + // Show data coming from GPU +#if DEBUG_DATA + { + if (ImGui::Begin("Result Data")) + { + auto drawColorField = [&](const char* fieldName, uint32_t index) + { + ImGui::Text("%s: %u", fieldName, index); + + if (index >= 27) + { + ImGui::SameLine(); + ImGui::Text(""); + return; + } + + const auto& c = colorLUT[index]; // uses the combined LUT we made earlier + + ImGui::SameLine(); + + // Color preview button + ImGui::ColorButton( + fieldName, + ImVec4(c.r, c.g, c.b, 1.0f), + 0, + ImVec2(20, 20) + ); + + ImGui::SameLine(); + ImGui::Text("%s", colorNames[index]); + }; + + // Vertices + if (ImGui::CollapsingHeader("Vertices", ImGuiTreeNodeFlags_DefaultOpen)) + { + for (uint32_t i = 0; i < 6; ++i) + { + if (i < m_GPUOutResulData.silhouetteVertexCount) + { + ImGui::Text("corners[%u]", i); + ImGui::SameLine(); + drawColorField(":", m_GPUOutResulData.vertices[i]); + ImGui::SameLine(); + static const float32_t3 constCorners[8] = { + float32_t3(-1, -1, -1), float32_t3(1, -1, -1), float32_t3(-1, 1, -1), float32_t3(1, 1, -1), + float32_t3(-1, -1, 1), float32_t3(1, -1, 1), float32_t3(-1, 1, 1), float32_t3(1, 1, 1) + }; + float32_t3 vertexLocation = constCorners[m_GPUOutResulData.vertices[i]]; + ImGui::Text(" : (%.3f, %.3f, %.3f", vertexLocation.x, vertexLocation.y, vertexLocation.z); + } + else + { + ImGui::Text("corners[%u] :: ", i); + ImGui::SameLine(); + ImGui::ColorButton( + "", + ImVec4(0.0f, 0.0f, 0.0f, 0.0f), + 0, + ImVec2(20, 20) + ); + ImGui::SameLine(); + ImGui::Text(""); + + } + + } + } + + if (ImGui::CollapsingHeader("Color LUT Map")) + { + for (int i = 0; i < 27; i++) + drawColorField(" ", i); + } + + ImGui::Separator(); + + // Silhouette info + drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); + + ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); + ImGui::Text("silhouette Clipped VertexCount: %u", m_GPUOutResulData.clippedVertexCount); + ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + + { + float32_t3 xAxis = m_OBBModelMatrix[0].xyz; + float32_t3 yAxis = m_OBBModelMatrix[1].xyz; + float32_t3 zAxis = m_OBBModelMatrix[2].xyz; + + float32_t3 nx = normalize(xAxis); + float32_t3 ny = normalize(yAxis); + float32_t3 nz = normalize(zAxis); + + const float epsilon = 1e-4; + bool hasSkew = false; + if (abs(dot(nx, ny)) > epsilon || abs(dot(nx, nz)) > epsilon || abs(dot(ny, nz)) > epsilon) + hasSkew = true; + ImGui::Text("Matrix Has Skew: %s", hasSkew ? "true" : "false"); + } + + static bool modalShown = false; + static uint32_t lastSilhouetteIndex = ~0u; + + // Reset modal flag if silhouette configuration changed + if (m_GPUOutResulData.silhouetteIndex != lastSilhouetteIndex) + { + modalShown = false; + lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; + } + + if (!m_GPUOutResulData.edgeVisibilityMismatch) + { + // Reset flag when mismatch is cleared + modalShown = false; + } + if (m_GPUOutResulData.edgeVisibilityMismatch && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + { + // Open modal popup only once per configuration + ImGui::OpenPopup("Edge Visibility Mismatch Warning"); + modalShown = true; + } + + // Modal popup + if (ImGui::BeginPopupModal("Edge Visibility Mismatch Warning", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + ImGui::TextColored(ImVec4(1.0f, 0.5f, 0.0f, 1.0f), "Warning: Edge Visibility Mismatch Detected!"); + ImGui::Separator(); + + ImGui::Text("The silhouette lookup table (LUT) does not match the computed edge visibility."); + ImGui::Text("This indicates the pre-computed silhouette data may be incorrect."); + ImGui::Spacing(); + + // Show configuration info + ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); + ImGui::TextWrapped("Region: (%d, %d, %d)", + m_GPUOutResulData.region.x, + m_GPUOutResulData.region.y, + m_GPUOutResulData.region.z); + ImGui::Spacing(); + + ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); + + // Show which specific vertices are mismatched + ImGui::Text("Vertices involved in mismatched edges:"); + ImGui::Indent(); + for (int i = 0; i < 8; i++) + { + if (m_GPUOutResulData.edgeVisibilityMismatch & (1u << i)) + { + ImGui::BulletText("Vertex %d", i); + } + } + ImGui::Unindent(); + ImGui::Spacing(); + + if (ImGui::Button("OK", ImVec2(120, 0))) + { + ImGui::CloseCurrentPopup(); + } + + ImGui::EndPopup(); + } + + ImGui::Separator(); + + // Region (uint32_t3) + ImGui::Text("region: (%u, %u, %u)", + m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); + + ImGui::Separator(); + + // Silhouette mask printed in binary + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (m_GPUOutResulData.silhouette & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + + ImGui::Text("silhouette: 0x%08X", m_GPUOutResulData.silhouette); + ImGui::Text("binary: %s", buf); + } + ImGui::End(); + } +#endif // view matrices editor { ImGui::Begin("Matrices"); @@ -995,6 +1239,32 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Separator(); }; + static RandomSampler rng(69); // Initialize RNG with seed + if (ImGui::Button("Randomize Translation")) + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + } + ImGui::SameLine(); + + if (ImGui::Button("Randomize Rotation")) + { + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + } + ImGui::SameLine(); + + if (ImGui::Button("Randomize Scale")) + { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + } + + ImGui::SameLine(); + if (ImGui::Button("Randomize All")) + { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + } + addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]); addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, camera.getViewMatrix().pointer()); addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, camera.getProjectionMatrix().pointer(), false); @@ -1071,6 +1341,8 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::End(); } ImGui::End(); + + ImGuizmo::RecomposeMatrixFromComponents(&m_TRS.translation.x, &m_TRS.rotation.x, &m_TRS.scale.x, &m_OBBModelMatrix[0][0]); } smart_refctd_ptr imGUI; @@ -1085,15 +1357,22 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR }; SubAllocatedDescriptorSet::value_type renderColorViewDescIndices[E_RENDER_VIEWS::Count] = { SubAllocatedDescriptorSet::invalid_value, SubAllocatedDescriptorSet::invalid_value }; // - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + Camera camera = Camera(cameraIntialPosition, cameraInitialTarget, core::matrix4SIMD(), 1, 1, nbl::core::vectorSIMDf(0.0f, 0.0f, 1.0f)); // mutables - float32_t4x4 m_OBBModelMatrix = OBBModelMatrixDefault; + struct TRS // Source of truth + { + float32_t3 translation{ 0.0f, 0.0f, 3.0f }; + float32_t3 rotation{ 0.0f }; // MUST stay orthonormal + float32_t3 scale{ 1.0f }; + } m_TRS; + float32_t4x4 m_OBBModelMatrix; // always overwritten from TRS //std::string_view objectName; TransformRequestParams transformParams; TransformReturnInfo mainViewTransformReturnInfo; TransformReturnInfo solidAngleViewTransformReturnInfo; + const static inline core::vectorSIMDf cameraIntialPosition{ -3.0f, 6.0f, 3.0f }; const static inline core::vectorSIMDf cameraInitialTarget{ 0.f, 0.0f, 3.f }; const static inline core::vectorSIMDf cameraInitialUp{ 0.f, 0.f, 1.f }; diff --git a/common/include/nbl/examples/cameras/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp index e5f077e46..c61f93333 100644 --- a/common/include/nbl/examples/cameras/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -302,6 +302,11 @@ class Camera lastVirtualUpTimeStamp = nextPresentationTimeStamp; } + // TODO: temporary but a good fix for the camera events when mouse stops dragging gizmo + void mouseKeysUp() + { + mouseDown = false; + } private: inline void initDefaultKeysMap() { mapKeysToWASD(); } From 2e306fc96bfae85a9669ad552751cece33d1b383 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 01:10:56 +0300 Subject: [PATCH 55/57] better (still not perfect) manual inverse of rotation matrix --- .../hlsl/SolidAngleVis.frag.hlsl | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index cd291dbd2..bf58e3231 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -228,21 +228,13 @@ void computeCubeGeo() float3x3 upper3x3 = (float3x3)pc.modelMatrix; #if 1 - // Compute reciprocal scales - float3 rcpScales = rsqrt(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) - )); - - // Build inverse-rotation-only matrix - float3x3 invRot; - invRot[0] = upper3x3[0] * rcpScales.x; - invRot[1] = upper3x3[1] * rcpScales.y; - invRot[2] = upper3x3[2] * rcpScales.z; - - // Project center into OBB local space - float3 normalizedProj = mul(invRot, obbCenter); +float3 rcpScales = rsqrt(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) +)); + +float3 normalizedProj = mul(transpose(upper3x3), obbCenter) * rcpScales; #else float3 normalizedProj = mul(inverse(upper3x3), obbCenter); #endif From 12486d4670f0453722351814996d91f198a16749 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Thu, 18 Dec 2025 02:24:41 +0300 Subject: [PATCH 56/57] Fixed faster inverse of rotation matrix, thanks Matt! --- .../hlsl/SolidAngleVis.frag.hlsl | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index bf58e3231..01d166aac 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -223,21 +223,20 @@ void computeCubeGeo() computeCubeGeo(); - float3 obbCenter = mul(pc.modelMatrix, float4(0, 0, 0, 1)).xyz; + float4x3 columnModel = transpose(pc.modelMatrix); - float3x3 upper3x3 = (float3x3)pc.modelMatrix; + float3 obbCenter = columnModel[3].xyz; -#if 1 -float3 rcpScales = rsqrt(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) -)); + float3x3 upper3x3 = (float3x3)columnModel; + + float3 rcpScales = rcp(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]) + )); + + float3 normalizedProj = mul(upper3x3, obbCenter) * rcpScales; -float3 normalizedProj = mul(transpose(upper3x3), obbCenter) * rcpScales; -#else - float3 normalizedProj = mul(inverse(upper3x3), obbCenter); -#endif int3 region = int3( normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), From 1961a898fd0a91c8e4d5c1a3fcb02df9142e8388 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sat, 20 Dec 2025 10:18:48 +0300 Subject: [PATCH 57/57] Fast clipping, less branches, also - More debug data going to imgui - Little bit of shader code refactoring - "Revert to last" button to go back to last random transformation of the OBB - Added getVertexZNeg() and getVertex() preprocessor branches for faster versions --- .../app_resources/hlsl/Drawing.hlsl | 122 ++-- .../hlsl/SolidAngleVis.frag.hlsl | 639 ++++++++++-------- .../app_resources/hlsl/common.hlsl | 42 +- 72_SolidAngleVisualizer/main.cpp | 90 ++- 4 files changed, 532 insertions(+), 361 deletions(-) diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl index c3cb5befa..f3f1b4e96 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/Drawing.hlsl @@ -16,79 +16,124 @@ float2 sphereToCircle(float3 spherePoint) } } -float4 drawGreatCircleArc(float3 fragPos, float3 points[2], int visibility, float aaWidth) +float drawGreatCircleArc(float3 fragPos, float3 points[2], float aaWidth, float width = 0.01f) { - if (visibility == 0) return float4(0,0,0,0); - float3 v0 = normalize(points[0]); float3 v1 = normalize(points[1]); float3 p = normalize(fragPos); - + float3 arcNormal = normalize(cross(v0, v1)); float dist = abs(dot(p, arcNormal)); - + float dotMid = dot(v0, v1); bool onArc = (dot(p, v0) >= dotMid) && (dot(p, v1) >= dotMid); - - if (!onArc) return float4(0,0,0,0); - + + if (!onArc) + return 0.0f; + float avgDepth = (length(points[0]) + length(points[1])) * 0.5f; float depthScale = 3.0f / avgDepth; - - float baseWidth = (visibility == 1) ? 0.01f : 0.005f; - float width = min(baseWidth * depthScale, 0.02f); - + + width = min(width * depthScale, 0.02f); float alpha = 1.0f - smoothstep(width - aaWidth, width + aaWidth, dist); - - float4 edgeColor = (visibility == 1) ? - float4(0.0f, 0.5f, 1.0f, 1.0f) : - float4(1.0f, 0.0f, 0.0f, 1.0f); - - float intensity = (visibility == 1) ? 1.0f : 0.5f; - return edgeColor * alpha * intensity; + + return alpha; } float4 drawHiddenEdges(float3 spherePos, uint32_t silEdgeMask, float aaWidth) { - float4 color = float4(0,0,0,0); + float4 color = 0; float3 hiddenEdgeColor = float3(0.1, 0.1, 0.1); - + + NBL_UNROLL for (int i = 0; i < 12; i++) { - if ((silEdgeMask & (1u << i)) == 0) + // skip silhouette edges + if (silEdgeMask & (1u << i)) + continue; + + int2 edge = allEdges[i]; + + float3 v0 = normalize(getVertex(edge.x)); + float3 v1 = normalize(getVertex(edge.y)); + + bool neg0 = v0.z < 0.0f; + bool neg1 = v1.z < 0.0f; + + // fully hidden + if (neg0 && neg1) + continue; + + float3 p0 = v0; + float3 p1 = v1; + + // clip if needed + if (neg0 ^ neg1) { - int2 edge = allEdges[i]; - float3 edgePoints[2] = { corners[edge.x], corners[edge.y] }; - float4 edgeContribution = drawGreatCircleArc(spherePos, edgePoints, 1, aaWidth); - color += float4(hiddenEdgeColor * edgeContribution.a, edgeContribution.a); + float t = v0.z / (v0.z - v1.z); + float3 clip = normalize(lerp(v0, v1, t)); + + p0 = neg0 ? clip : v0; + p1 = neg1 ? clip : v1; } + + float3 pts[2] = {p0, p1}; + float4 c = drawGreatCircleArc(spherePos, pts, aaWidth, 0.005f); + color += float4(hiddenEdgeColor * c.a, c.a); } + return color; } float4 drawCorners(float3 spherePos, float2 p, float aaWidth) { - float4 color = float4(0,0,0,0); + float4 color = 0; + + float dotSize = 0.02f; + float innerDotSize = dotSize * 0.5f; + for (int i = 0; i < 8; i++) { - float3 corner3D = normalize(corners[i]); + float3 corner3D = normalize(getVertex(i)); float2 cornerPos = sphereToCircle(corner3D); + float dist = length(p - cornerPos); - float dotSize = 0.02f; - float dotAlpha = 1.0f - smoothstep(dotSize - aaWidth, dotSize + aaWidth, dist); - if (dotAlpha > 0.0f) + + // outer dot + float outerAlpha = 1.0f - smoothstep(dotSize - aaWidth, + dotSize + aaWidth, + dist); + + if (outerAlpha <= 0.0f) + continue; + + float3 dotColor = colorLUT[i]; + color += float4(dotColor * outerAlpha, outerAlpha); + + // ------------------------------------------------- + // inner black dot for hidden corners + // ------------------------------------------------- + if (corner3D.z < 0.0f) { - float3 dotColor = colorLUT[i]; - color += float4(dotColor * dotAlpha, dotAlpha); + float innerAlpha = 1.0f - smoothstep(innerDotSize - aaWidth, + innerDotSize + aaWidth, + dist); + + // ensure it stays inside the outer dot + innerAlpha *= outerAlpha; + + float3 innerColor = float3(0.0, 0.0, 0.0); + color -= float4(innerAlpha.xxx, 0.0f); } } + return color; } float4 drawRing(float2 p, float aaWidth) { float positionLength = length(p); - float ringWidth = 0.002f; + float ringWidth = 0.003f; float ringDistance = abs(positionLength - CIRCLE_RADIUS); float ringAlpha = 1.0f - smoothstep(ringWidth - aaWidth, ringWidth + aaWidth, ringDistance); return ringAlpha * float4(1, 1, 1, 1); @@ -114,10 +159,12 @@ int getEdgeVisibility(int edgeIdx) bool visible2 = isFaceVisible(faceCenters[faces.y], n_world_f2); // Silhouette: exactly one face visible - if (visible1 != visible2) return 1; + if (visible1 != visible2) + return 1; // Inner edge: both faces visible - if (visible1 && visible2) return 2; + if (visible1 && visible2) + return 2; // Hidden edge: both faces hidden return 0; @@ -162,11 +209,10 @@ void validateEdgeVisibility(uint32_t sil, int vertexCount, uint32_t generatedSil } } } - + // Simple Write (assuming all fragments calculate the same result) InterlockedOr(DebugDataBuffer[0].edgeVisibilityMismatch, mismatchAccumulator); } #endif - #endif // _DEBUG_HLSL_ diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl index 01d166aac..d7ceed943 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/SolidAngleVis.frag.hlsl @@ -15,351 +15,438 @@ static const float CIRCLE_RADIUS = 0.5f; // --- Geometry Utils --- static const float3 constCorners[8] = { - float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), - float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1) -}; + float3(-1, -1, -1), float3(1, -1, -1), float3(-1, 1, -1), float3(1, 1, -1), + float3(-1, -1, 1), float3(1, -1, 1), float3(-1, 1, 1), float3(1, 1, 1)}; static const int2 allEdges[12] = { - {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis - {0, 2}, {1, 3}, {4, 6}, {5, 7}, // Y axis - {0, 4}, {1, 5}, {2, 6}, {3, 7} // Z axis + {0, 1}, {2, 3}, {4, 5}, {6, 7}, // X axis + {0, 2}, + {1, 3}, + {4, 6}, + {5, 7}, // Y axis + {0, 4}, + {1, 5}, + {2, 6}, + {3, 7} // Z axis }; // Adjacency of edges to faces // Corrected Adjacency of edges to faces static const int2 edgeToFaces[12] = { - // Edge Index: | allEdges[i] | Shared Faces: - - /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) - /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) - /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) - /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) - - /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) - /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) - /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) - /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) - - /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) - /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) - /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) - /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) + // Edge Index: | allEdges[i] | Shared Faces: + + /* 0 (0-1) */ {4, 0}, // Y- (4) and Z- (0) + /* 1 (2-3) */ {5, 0}, // Y+ (5) and Z- (0) + /* 2 (4-5) */ {4, 1}, // Y- (4) and Z+ (1) + /* 3 (6-7) */ {5, 1}, // Y+ (5) and Z+ (1) + + /* 4 (0-2) */ {2, 0}, // X- (2) and Z- (0) + /* 5 (1-3) */ {3, 0}, // X+ (3) and Z- (0) + /* 6 (4-6) */ {2, 1}, // X- (2) and Z+ (1) + /* 7 (5-7) */ {3, 1}, // X+ (3) and Z+ (1) + + /* 8 (0-4) */ {2, 4}, // X- (2) and Y- (4) + /* 9 (1-5) */ {3, 4}, // X+ (3) and Y- (4) + /* 10 (2-6) */ {2, 5}, // X- (2) and Y+ (5) + /* 11 (3-7) */ {3, 5} // X+ (3) and Y+ (5) }; static float3 corners[8]; static float3 faceCenters[6] = { - float3(0,0,0), float3(0,0,0), float3(0,0,0), - float3(0,0,0), float3(0,0,0), float3(0,0,0) -}; + float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), + float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0)}; static const float3 localNormals[6] = { - float3(0, 0, -1), // Face 0 (Z-) - float3(0, 0, 1), // Face 1 (Z+) - float3(-1, 0, 0), // Face 2 (X-) - float3(1, 0, 0), // Face 3 (X+) - float3(0, -1, 0), // Face 4 (Y-) - float3(0, 1, 0) // Face 5 (Y+) + float3(0, 0, -1), // Face 0 (Z-) + float3(0, 0, 1), // Face 1 (Z+) + float3(-1, 0, 0), // Face 2 (X-) + float3(1, 0, 0), // Face 3 (X+) + float3(0, -1, 0), // Face 4 (Y-) + float3(0, 1, 0) // Face 5 (Y+) }; - // TODO: unused, remove later // Vertices are ordered CCW relative to the camera view. static const int silhouettes[27][7] = { - {6, 1, 3, 2, 6, 4, 5}, // 0: Black - {6, 2, 6, 4, 5, 7, 3}, // 1: White - {6, 0, 4, 5, 7, 3, 2}, // 2: Gray - {6, 1, 3, 7, 6, 4, 5,}, // 3: Red - {4, 4, 5, 7, 6, -1, -1}, // 4: Green - {6, 0, 4, 5, 7, 6, 2}, // 5: Blue - {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow - {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta - {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan - {6, 1, 3, 2, 6, 7, 5}, // 9: Orange - {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange - {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange - {4, 1, 3, 7, 5, -1, -1}, // 12: Pink - {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink - {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose - {6, 0, 1, 3, 7, 5, 4}, // 15: Purple - {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple - {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo - {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green - {6, 0, 2, 6, 7, 3, 1}, // 19: Lime - {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green - {6, 0, 2, 3, 7, 5, 1}, // 21: Navy - {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue - {6, 0, 4, 6, 2, 3, 1}, // 23: Teal - {6, 0, 2, 3, 7, 5, 4}, // 24: Brown - {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige - {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown + {6, 1, 3, 2, 6, 4, 5}, // 0: Black + {6, 2, 6, 4, 5, 7, 3}, // 1: White + {6, 0, 4, 5, 7, 3, 2}, // 2: Gray + {6, 1, 3, 7, 6, 4, 5}, // 3: Red + {4, 4, 5, 7, 6, -1, -1}, // 4: Green + {6, 0, 4, 5, 7, 6, 2}, // 5: Blue + {6, 0, 1, 3, 7, 6, 4}, // 6: Yellow + {6, 0, 1, 5, 7, 6, 4}, // 7: Magenta + {6, 0, 1, 5, 7, 6, 2}, // 8: Cyan + {6, 1, 3, 2, 6, 7, 5}, // 9: Orange + {4, 2, 6, 7, 3, -1, -1}, // 10: Light Orange + {6, 0, 4, 6, 7, 3, 2}, // 11: Dark Orange + {4, 1, 3, 7, 5, -1, -1}, // 12: Pink + {6, 0, 4, 6, 7, 3, 2}, // 13: Light Pink + {4, 0, 4, 6, 2, -1, -1}, // 14: Deep Rose + {6, 0, 1, 3, 7, 5, 4}, // 15: Purple + {4, 0, 1, 5, 4, -1, -1}, // 16: Light Purple + {6, 0, 1, 5, 4, 6, 2}, // 17: Indigo + {6, 0, 2, 6, 7, 5, 1}, // 18: Dark Green + {6, 0, 2, 6, 7, 3, 1}, // 19: Lime + {6, 0, 4, 6, 7, 3, 1}, // 20: Forest Green + {6, 0, 2, 3, 7, 5, 1}, // 21: Navy + {4, 0, 2, 3, 1, -1, -1}, // 22: Sky Blue + {6, 0, 4, 6, 2, 3, 1}, // 23: Teal + {6, 0, 2, 3, 7, 5, 4}, // 24: Brown + {6, 0, 2, 3, 1, 5, 4}, // 25: Tan/Beige + {6, 1, 5, 4, 6, 2, 3} // 26: Dark Brown }; // Binary packed silhouettes static const uint32_t binSilhouettes[27] = { - 0b11000000000000101100110010011001, - 0b11000000000000011111101100110010, - 0b11000000000000010011111101100000, - 0b11000000000000101100110111011001, - 0b10000000000000000000110111101100, - 0b11000000000000010110111101100000, - 0b11000000000000100110111011001000, - 0b11000000000000100110111101001000, - 0b11000000000000010110111101001000, - 0b11000000000000101111110010011001, - 0b10000000000000000000011111110010, - 0b11000000000000010011111110100000, - 0b10000000000000000000101111011001, - 0b11000000000000010011111110100000, - 0b10000000000000000000010110100000, - 0b11000000000000100101111011001000, - 0b10000000000000000000100101001000, - 0b11000000000000010110100101001000, - 0b11000000000000001101111110010000, - 0b11000000000000001011111110010000, - 0b11000000000000001011111110100000, - 0b11000000000000001101111011010000, - 0b10000000000000000000001011010000, - 0b11000000000000001011010110100000, - 0b11000000000000100101111011010000, - 0b11000000000000100101001011010000, - 0b11000000000000011010110100101001, + 0b11000000000000101100110010011001, + 0b11000000000000011111101100110010, + 0b11000000000000010011111101100000, + 0b11000000000000101100110111011001, + 0b10000000000000000000110111101100, + 0b11000000000000010110111101100000, + 0b11000000000000100110111011001000, + 0b11000000000000100110111101001000, + 0b11000000000000010110111101001000, + 0b11000000000000101111110010011001, + 0b10000000000000000000011111110010, + 0b11000000000000010011111110100000, + 0b10000000000000000000101111011001, + 0b11000000000000010011111110100000, + 0b10000000000000000000010110100000, + 0b11000000000000100101111011001000, + 0b10000000000000000000100101001000, + 0b11000000000000010110100101001000, + 0b11000000000000001101111110010000, + 0b11000000000000001011111110010000, + 0b11000000000000001011111110100000, + 0b11000000000000001101111011010000, + 0b10000000000000000000001011010000, + 0b11000000000000001011010110100000, + 0b11000000000000100101111011010000, + 0b11000000000000100101001011010000, + 0b11000000000000011010110100101001, }; int getSilhouetteVertex(uint32_t packedSil, int index) { - return (packedSil >> (3 * index)) & 0x7; + return (packedSil >> (3 * index)) & 0x7; } // Get silhouette size int getSilhouetteSize(uint32_t sil) { - return (sil >> 29) & 0x7; - + return (sil >> 29) & 0x7; } // Check if vertex has negative z bool getVertexZNeg(int vertexIdx) { - return normalize(corners[vertexIdx]).z < 0.0f; +#if FAST + float3 localPos = float3( + (vertexIdx & 1) ? 1.0f : -1.0f, + (vertexIdx & 2) ? 1.0f : -1.0f, + (vertexIdx & 4) ? 1.0f : -1.0f); + + float transformedZ = dot(pc.modelMatrix[2].xyz, localPos) + pc.modelMatrix[2].w; + return transformedZ < 0.0f; +#else + return corners[vertexIdx].z < 0.0f; +#endif } -#include "Drawing.hlsl" +float3 getVertex(int vertexIdx) +{ +#if FAST + // Reconstruct local cube corner from index bits + float sx = (vertexIdx & 1) ? 1.0f : -1.0f; + float sy = (vertexIdx & 2) ? 1.0f : -1.0f; + float sz = (vertexIdx & 4) ? 1.0f : -1.0f; + + float4x3 model = transpose(pc.modelMatrix); + + // Transform to world + // Full position, not just Z like getVertexZNeg + return model[0].xyz * sx + + model[1].xyz * sy + + model[2].xyz * sz + + model[3].xyz; + // return mul(pc.modelMatrix, float4(sx, sy, sz, 1.0f)); +#else + return corners[vertexIdx]; +#endif +} +#include "Drawing.hlsl" -void setDebugData(uint32_t sil, int3 region, int configIndex, uint32_t clippedVertexCount) +void setDebugData(uint32_t sil, int3 region, int configIndex) { #if DEBUG_DATA - DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); - DebugDataBuffer[0].region = uint3(region); - DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); - DebugDataBuffer[0].clippedVertexCount = clippedVertexCount; - for (int i = 0; i < 6; i++) - { - DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); - } - DebugDataBuffer[0].silhouette = sil; + DebugDataBuffer[0].silhouetteVertexCount = uint32_t(getSilhouetteSize(sil)); + DebugDataBuffer[0].region = uint3(region); + DebugDataBuffer[0].silhouetteIndex = uint32_t(configIndex); + for (int i = 0; i < 6; i++) + { + DebugDataBuffer[0].vertices[i] = uint32_t(getSilhouetteVertex(sil, i)); + } + DebugDataBuffer[0].silhouette = sil; #endif } float2 toCircleSpace(float2 uv) { - float2 p = uv * 2.0f - 1.0f; - float aspect = pc.viewport.z / pc.viewport.w; - p.x *= aspect; - return p; + float2 p = uv * 2.0f - 1.0f; + float aspect = pc.viewport.z / pc.viewport.w; + p.x *= aspect; + return p; } -uint32_t packSilhouette(const int s[7]) +uint32_t packSilhouette(const int s[7]) { - uint32_t packed = 0; - int size = s[0] & 0x7; // 3 bits for size - - // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) - for (int i = 1; i <= 6; ++i) { - int v = s[i]; - if (v < 0) v = 0; // replace unused vertices with 0 - packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) - } - - // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) - packed |= (size & 0x7) << 29; - - return packed; + uint32_t packed = 0; + int size = s[0] & 0x7; // 3 bits for size + + // Pack vertices LSB-first (vertex1 in lowest 3 bits above size) + for (int i = 1; i <= 6; ++i) + { + int v = s[i]; + if (v < 0) + v = 0; // replace unused vertices with 0 + packed |= (v & 0x7) << (3 * (i - 1)); // vertex i-1 shifted by 3*(i-1) + } + + // Put size in the MSB (bits 29-31 for a 32-bit uint, leaving 29 bits for vertices) + packed |= (size & 0x7) << 29; + + return packed; } void computeCubeGeo() { - for (int i = 0; i < 8; i++) - for (int i = 0; i < 8; i++) - { - float3 localPos = constCorners[i]; - float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; - corners[i] = worldPos.xyz; - faceCenters[i / 4] += worldPos / 4.0f; - faceCenters[2 + i % 2] += worldPos / 4.0f; - faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; - } + for (int i = 0; i < 8; i++) + { + float3 localPos = constCorners[i]; + float3 worldPos = mul(pc.modelMatrix, float4(localPos, 1.0f)).xyz; + corners[i] = worldPos.xyz; + faceCenters[i / 4] += worldPos / 4.0f; + faceCenters[2 + i % 2] += worldPos / 4.0f; + faceCenters[4 + (i / 2) % 2] += worldPos / 4.0f; + } +} + +// Helper to draw an edge with proper color mapping +float4 drawEdge(int originalEdgeIdx, float3 pts[2], float3 spherePos, float aaWidth, float width = 0.01f) +{ + float4 edgeContribution = drawGreatCircleArc(spherePos, pts, aaWidth, width); + return float4(colorLUT[originalEdgeIdx] * edgeContribution.a, edgeContribution.a); +}; + +float4 drawSilhouette(uint32_t vertexCount, uint32_t sil, float3 spherePos, float aaWidth) +{ + float4 color = 0; + + // Build clip mask (z < 0) + uint32_t clipMask = 0u; + NBL_UNROLL + for (int i = 0; i < 4; i++) + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + + if (vertexCount == 6) + { + NBL_UNROLL + for (int i = 4; i < 6; i++) + clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; + } + + int clipCount = countbits(clipMask); + + // Early exit if fully clipped + if (clipCount == vertexCount) + return color; + + // No clipping needed - fast path + if (clipCount == 0) + { + for (int i = 0; i < vertexCount; i++) + { + int i0 = i; + int i1 = (i + 1) % vertexCount; + + float3 v0 = getVertex(getSilhouetteVertex(sil, i0)); + float3 v1 = getVertex(getSilhouetteVertex(sil, i1)); + float3 pts[2] = {v0, v1}; + + color += drawEdge(i1, pts, spherePos, aaWidth); + } + return color; + } + + // Rotate clip mask so positives come first + uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); + bool wrapAround = ((clipMask & 1u) != 0u) && + ((clipMask & (1u << (vertexCount - 1))) != 0u); + int rotateAmount = wrapAround + ? firstbitlow(invertedMask) // -> First POSITIVE + : firstbithigh(clipMask) + 1; // -> First vertex AFTER last negative + + uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); + uint32_t rotatedSil = rotr(sil, rotateAmount * 3, vertexCount * 3); + + int positiveCount = vertexCount - clipCount; + + // ALWAYS compute both clip points + int lastPosIdx = positiveCount - 1; + int firstNegIdx = positiveCount; + float3 vLastPos = getVertex(getSilhouetteVertex(rotatedSil, lastPosIdx)); + float3 vFirstNeg = getVertex(getSilhouetteVertex(rotatedSil, firstNegIdx)); + float t = vLastPos.z / (vLastPos.z - vFirstNeg.z); + float3 clipA = lerp(vLastPos, vFirstNeg, t); + + float3 vLastNeg = getVertex(getSilhouetteVertex(rotatedSil, vertexCount - 1)); + float3 vFirstPos = getVertex(getSilhouetteVertex(rotatedSil, 0)); + t = vLastNeg.z / (vLastNeg.z - vFirstPos.z); + float3 clipB = lerp(vLastNeg, vFirstPos, t); + + // Draw positive edges + NBL_UNROLL + for (int i = 0; i < positiveCount; i++) + { + + float3 v0 = getVertex(getSilhouetteVertex(rotatedSil, i)); + bool useClipA = (i == positiveCount - 1); + float3 v1 = useClipA ? clipA : getVertex(getSilhouetteVertex(rotatedSil, (i + 1) % vertexCount)); + + float3 pts[2] = {v0, v1}; + color += drawEdge(i + 1, pts, spherePos, aaWidth); + } + + // NP edge + if (clipCount > 0 && clipCount < vertexCount) + { + float3 vFirst = getVertex(getSilhouetteVertex(rotatedSil, 0)); + float3 npPts[2] = {clipB, vFirst}; + color += drawEdge(0, npPts, spherePos, aaWidth); + } + + // Horizon arc + if (clipCount > 0 && clipCount < vertexCount) + { + float3 arcPts[2] = {clipA, clipB}; + color += drawEdge(23, arcPts, spherePos, aaWidth, 0.6f); + } + +#if DEBUG_DATA + DebugDataBuffer[0].clipMask = clipMask; + DebugDataBuffer[0].clipCount = clipCount; + { + int transitions = 0; + for (int i = 0; i < vertexCount; i++) + { + bool a = (rotatedClipMask >> i) & 1u; + bool b = (rotatedClipMask >> ((i + 1) % vertexCount)) & 1u; + if (a != b) + transitions++; + } + // transitions must be 0 or 2 + DebugDataBuffer[0].MoreThanTwoBitTransitions = transitions > 2; + DebugDataBuffer[0].rotatedClipMask = rotatedClipMask; + DebugDataBuffer[0].rotateAmount = rotateAmount; + DebugDataBuffer[0].positiveVertCount = positiveCount; + DebugDataBuffer[0].wrapAround = (uint32_t)wrapAround; + DebugDataBuffer[0].rotatedSil = rotatedSil; + } +#endif + return color; } [[vk::location(0)]] float32_t4 main(SVertexAttributes vx) : SV_Target0 { - float4 color = float4(0, 0, 0, 0); - float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); - float2 p = toCircleSpace(vx.uv); + float4 color = float4(0, 0, 0, 0); + for (int i = 0; i < 1; i++) + { + + float aaWidth = length(float2(ddx(vx.uv.x), ddy(vx.uv.y))); + float2 p = toCircleSpace(vx.uv); - float2 normalized = p / CIRCLE_RADIUS; - float r2 = dot(normalized, normalized); + float2 normalized = p / CIRCLE_RADIUS; + float r2 = dot(normalized, normalized); - float3 spherePos; - if (r2 <= 1.0f) - { - spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); - } - else - { - float uv2Plus1 = r2 + 1.0f; - spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; - } - spherePos = normalize(spherePos); + float3 spherePos; + if (r2 <= 1.0f) + { + spherePos = float3(normalized.x, normalized.y, sqrt(1.0f - r2)); + } + else + { + float uv2Plus1 = r2 + 1.0f; + spherePos = float3(normalized.x * 2.0f, normalized.y * 2.0f, 1.0f - r2) / uv2Plus1; + } + spherePos = normalize(spherePos); - computeCubeGeo(); + computeCubeGeo(); - float4x3 columnModel = transpose(pc.modelMatrix); + float4x3 columnModel = transpose(pc.modelMatrix); - float3 obbCenter = columnModel[3].xyz; + float3 obbCenter = columnModel[3].xyz; - float3x3 upper3x3 = (float3x3)columnModel; + float3x3 upper3x3 = (float3x3)columnModel; - float3 rcpScales = rcp(float3( - dot(upper3x3[0], upper3x3[0]), - dot(upper3x3[1], upper3x3[1]), - dot(upper3x3[2], upper3x3[2]) - )); + float3 rcpSqScales = rcp(float3( + dot(upper3x3[0], upper3x3[0]), + dot(upper3x3[1], upper3x3[1]), + dot(upper3x3[2], upper3x3[2]))); - float3 normalizedProj = mul(upper3x3, obbCenter) * rcpScales; + float3 normalizedProj = mul(upper3x3, obbCenter) * rcpSqScales; - int3 region = int3( - normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), - normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), - normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1) - ); - int configIndex = region.x + region.y * 3 + region.z * 9; + int3 region = int3( + normalizedProj.x < -1.0f ? 0 : (normalizedProj.x > 1.0f ? 2 : 1), + normalizedProj.y < -1.0f ? 0 : (normalizedProj.y > 1.0f ? 2 : 1), + normalizedProj.z < -1.0f ? 0 : (normalizedProj.z > 1.0f ? 2 : 1)); - // uint32_t sil = packSilhouette(silhouettes[configIndex]); - uint32_t sil = binSilhouettes[configIndex]; + int configIndex = region.x + region.y * 3 + region.z * 9; - int vertexCount = getSilhouetteSize(sil); - bool longSilhouette = (vertexCount == 6); - uint32_t silEdgeMask = 0; + // uint32_t sil = packSilhouette(silhouettes[configIndex]); + uint32_t sil = binSilhouettes[configIndex]; + + int vertexCount = getSilhouetteSize(sil); + uint32_t silEdgeMask = 0; #if DEBUG_DATA - { - for (int i = 0; i < vertexCount; i++) - { - int vIdx = i % vertexCount; - int v1Idx = (i + 1) % vertexCount; - - int v0Corner = getSilhouetteVertex(sil, vIdx); - int v1Corner = getSilhouetteVertex(sil, v1Idx); - // Mark edge as part of silhouette - for (int e = 0; e < 12; e++) - { - int2 edge = allEdges[e]; - if ((edge.x == v0Corner && edge.y == v1Corner) || - (edge.x == v1Corner && edge.y == v0Corner)) - { - silEdgeMask |= (1u << e); - } - } - } - validateEdgeVisibility(sil, vertexCount, silEdgeMask); - } + { + for (int i = 0; i < vertexCount; i++) + { + int vIdx = i % vertexCount; + int v1Idx = (i + 1) % vertexCount; + + int v0Corner = getSilhouetteVertex(sil, vIdx); + int v1Corner = getSilhouetteVertex(sil, v1Idx); + // Mark edge as part of silhouette + for (int e = 0; e < 12; e++) + { + int2 edge = allEdges[e]; + if ((edge.x == v0Corner && edge.y == v1Corner) || + (edge.x == v1Corner && edge.y == v0Corner)) + { + silEdgeMask |= (1u << e); + } + } + } + validateEdgeVisibility(sil, vertexCount, silEdgeMask); + } #endif - // Build clip mask for vertices below horizon (z < 0) - uint32_t clipMask = 0u; - NBL_UNROLL - for (int i = 0; i < 6; i++) - { - if (i >= vertexCount) break; - clipMask |= (getVertexZNeg(getSilhouetteVertex(sil, i)) ? 1u : 0u) << i; - } - - int clipCount = countbits(clipMask); - - // Total clipped vertices - int clippedVertCount = vertexCount + (clipMask != 0u ? (2 - clipCount) : 0); - - // Find rotation amount to place positive vertices first - int rotateAmount = 0; - if (clipMask != 0u) - { - uint32_t invertedMask = ~clipMask & ((1u << vertexCount) - 1u); - bool wrapAround = ((clipMask & 1u) != 0u) && ((clipMask >> (vertexCount - 1)) & 1u); - - rotateAmount = wrapAround ? - ((firstbithigh(invertedMask) + 1) % vertexCount) : - firstbitlow(clipMask); - } - - // Rotate silhouette bits - uint32_t vertexBits = sil & 0x1FFFFFFF; - uint32_t rotatedVertexBits = rotr(vertexBits, rotateAmount * 3, vertexCount * 3); - uint32_t rotatedSil = (sil & 0xE0000000) | rotatedVertexBits; - - // Rotate the clip mask to match - uint32_t rotatedClipMask = rotr(clipMask, rotateAmount, vertexCount); - - // Draw clipped silhouette edges - for (int i = 0; i < clippedVertCount; i++) - { - int nextI = (i + 1) % clippedVertCount; - - int vIdx = i % vertexCount; - int v1Idx = nextI % vertexCount; - - // Extract clip bits directly - bool v0Clipped = (rotatedClipMask >> vIdx) & 1u; - bool v1Clipped = (rotatedClipMask >> v1Idx) & 1u; - - // Skip if both clipped - if (v0Clipped && v1Clipped) continue; - - int v0Corner = getSilhouetteVertex(rotatedSil, vIdx); - int v1Corner = getSilhouetteVertex(rotatedSil, v1Idx); - - float3 v0 = normalize(corners[v0Corner]); - float3 v1 = normalize(corners[v1Corner]); - - float3 points[2] = { corners[v0Corner], corners[v1Corner] }; - - // Clip using bit state - if (v0Clipped) - { - float t = v0.z / (v0.z - v1.z); - points[0] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); - } - else if (v1Clipped) - { - float t = v0.z / (v0.z - v1.z); - points[1] = normalize(lerp(corners[v0Corner], corners[v1Corner], t)); - } - - // Draw edge - float4 edgeContribution = drawGreatCircleArc(spherePos, points, 1, aaWidth); - color += float4(colorLUT[i] * edgeContribution.a, edgeContribution.a); - - } - - - setDebugData(sil, region, configIndex, clippedVertCount); - - color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); - color += drawCorners(spherePos, p, aaWidth); - color += drawRing(p, aaWidth); - - if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) - { - return float4(colorLUT[configIndex], 1.0f); - } - - return color; + + uint32_t positiveCount = 0; + color += drawSilhouette(vertexCount, sil, spherePos, aaWidth); + setDebugData(sil, region, configIndex); + + color += drawHiddenEdges(spherePos, silEdgeMask, aaWidth); + color += drawCorners(spherePos, p, aaWidth); + color += drawRing(p, aaWidth); + + if (all(vx.uv >= float2(0.49f, 0.49f)) && all(vx.uv <= float2(0.51f, 0.51f))) + { + return float4(colorLUT[configIndex], 1.0f); + } + } + + return color; } \ No newline at end of file diff --git a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl index 3c87a48bc..c8532e796 100644 --- a/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl +++ b/72_SolidAngleVisualizer/app_resources/hlsl/common.hlsl @@ -3,6 +3,7 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #define DEBUG_DATA 1 +#define FAST 1 namespace nbl { @@ -13,12 +14,19 @@ namespace nbl { uint32_t3 region; uint32_t silhouetteIndex; - + uint32_t silhouetteVertexCount; uint32_t silhouette; - uint32_t clippedVertexCount; + uint32_t positiveVertCount; uint32_t edgeVisibilityMismatch; + uint32_t clipMask; + uint32_t clipCount; + uint32_t rotatedSil; + uint32_t wrapAround; + uint32_t rotatedClipMask; + uint32_t rotateAmount; + uint32_t MoreThanTwoBitTransitions; uint32_t vertices[6]; }; @@ -29,24 +37,22 @@ namespace nbl }; static const float32_t3 colorLUT[27] = { - float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), - float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), - float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), - float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), - float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), - float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), - float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), - float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), - float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1) - }; + float32_t3(0, 0, 0), float32_t3(1, 1, 1), float32_t3(0.5, 0.5, 0.5), + float32_t3(1, 0, 0), float32_t3(0, 1, 0), float32_t3(0, 0, 1), + float32_t3(1, 1, 0), float32_t3(1, 0, 1), float32_t3(0, 1, 1), + float32_t3(1, 0.5, 0), float32_t3(1, 0.65, 0), float32_t3(0.8, 0.4, 0), + float32_t3(1, 0.4, 0.7), float32_t3(1, 0.75, 0.8), float32_t3(0.7, 0.1, 0.3), + float32_t3(0.5, 0, 0.5), float32_t3(0.6, 0.4, 0.8), float32_t3(0.3, 0, 0.5), + float32_t3(0, 0.5, 0), float32_t3(0.5, 1, 0), float32_t3(0, 0.5, 0.25), + float32_t3(0, 0, 0.5), float32_t3(0.3, 0.7, 1), float32_t3(0, 0.4, 0.6), + float32_t3(0.6, 0.4, 0.2), float32_t3(0.8, 0.7, 0.3), float32_t3(0.4, 0.3, 0.1)}; #ifndef __HLSL_VERSION - static const char* colorNames[27] = {"Black", - "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", - "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", - "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", - "Tan/Beige", "Dark Brown" - }; + static const char *colorNames[27] = {"Black", + "White", "Gray", "Red", "Green", "Blue", "Yellow", "Magenta", "Cyan", + "Orange", "Light Orange", "Dark Orange", "Pink", "Light Pink", "Deep Rose", "Purple", "Light Purple", + "Indigo", "Dark Green", "Lime", "Forest Green", "Navy", "Sky Blue", "Teal", "Brown", + "Tan/Beige", "Dark Brown"}; #endif // __HLSL_VERSION } } diff --git a/72_SolidAngleVisualizer/main.cpp b/72_SolidAngleVisualizer/main.cpp index 1c52547af..64f4cb100 100644 --- a/72_SolidAngleVisualizer/main.cpp +++ b/72_SolidAngleVisualizer/main.cpp @@ -475,13 +475,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR instance.packedGeo = m_renderer->getGeometries().data(); // cube // +interface.gcIndex; m_renderer->render(cb, viewParams); // draw the cube/OBB - // TODO: a better way to get identity matrix - float32_t3x4 origin = { - 1.0f,0.0f,0.0f,0.0f, - 0.0f,1.0f,0.0f,0.0f, - 0.0f,0.0f,1.0f,0.0f - }; - memcpy(&instance.world, &origin, sizeof(instance.world)); + instance.world = float32_t3x4(1.0f); instance.packedGeo = m_renderer->getGeometries().data() + 2; // disk m_renderer->render(cb, viewParams); } @@ -1112,8 +1106,9 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR drawColorField("silhouetteIndex", m_GPUOutResulData.silhouetteIndex); ImGui::Text("silhouette Vertex Count: %u", m_GPUOutResulData.silhouetteVertexCount); - ImGui::Text("silhouette Clipped VertexCount: %u", m_GPUOutResulData.clippedVertexCount); + ImGui::Text("silhouette Positive VertexCount: %u", m_GPUOutResulData.positiveVertCount); ImGui::Text("Silhouette Mismatch: %s", m_GPUOutResulData.edgeVisibilityMismatch ? "true" : "false"); + ImGui::Text("More Than Two Bit Transitions: %s", m_GPUOutResulData.MoreThanTwoBitTransitions ? "true" : "false"); { float32_t3 xAxis = m_OBBModelMatrix[0].xyz; @@ -1141,12 +1136,12 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR lastSilhouetteIndex = m_GPUOutResulData.silhouetteIndex; } - if (!m_GPUOutResulData.edgeVisibilityMismatch) + if (!m_GPUOutResulData.edgeVisibilityMismatch || !m_GPUOutResulData.MoreThanTwoBitTransitions) { // Reset flag when mismatch is cleared modalShown = false; } - if (m_GPUOutResulData.edgeVisibilityMismatch && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care + if ((m_GPUOutResulData.edgeVisibilityMismatch || m_GPUOutResulData.MoreThanTwoBitTransitions) && m_GPUOutResulData.silhouetteIndex != 13 && !modalShown) // 13 means we're inside the cube, so don't care { // Open modal popup only once per configuration ImGui::OpenPopup("Edge Visibility Mismatch Warning"); @@ -1165,10 +1160,7 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR // Show configuration info ImGui::TextWrapped("Configuration Index: %u", m_GPUOutResulData.silhouetteIndex); - ImGui::TextWrapped("Region: (%d, %d, %d)", - m_GPUOutResulData.region.x, - m_GPUOutResulData.region.y, - m_GPUOutResulData.region.z); + ImGui::TextWrapped("Region: (%u, %u, %u)", m_GPUOutResulData.region.x, m_GPUOutResulData.region.y, m_GPUOutResulData.region.z); ImGui::Spacing(); ImGui::Text("Mismatched Vertices (bitmask): 0x%08X", m_GPUOutResulData.edgeVisibilityMismatch); @@ -1203,13 +1195,26 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR ImGui::Separator(); // Silhouette mask printed in binary - char buf[33]; - for (int i = 0; i < 32; i++) - buf[i] = (m_GPUOutResulData.silhouette & (1u << (31 - i))) ? '1' : '0'; - buf[32] = '\0'; - ImGui::Text("silhouette: 0x%08X", m_GPUOutResulData.silhouette); - ImGui::Text("binary: %s", buf); + + auto printBin = [](uint32_t bin, const char* name) + { + char buf[33]; + for (int i = 0; i < 32; i++) + buf[i] = (bin & (1u << (31 - i))) ? '1' : '0'; + buf[32] = '\0'; + ImGui::Text("%s: 0x%08X", name, bin); + ImGui::Text("binary: 0b%s", buf); + ImGui::Separator(); + }; + printBin(m_GPUOutResulData.silhouette, "Silhouette"); + printBin(m_GPUOutResulData.rotatedSil, "rotatedSilhouette"); + + printBin(m_GPUOutResulData.clipCount, "clipCount"); + printBin(m_GPUOutResulData.clipMask, "clipMask"); + printBin(m_GPUOutResulData.rotatedClipMask, "rotatedClipMask"); + printBin(m_GPUOutResulData.rotateAmount, "rotateAmount"); + printBin(m_GPUOutResulData.wrapAround, "wrapAround"); } ImGui::End(); } @@ -1240,29 +1245,56 @@ class SolidAngleVisualizer final : public MonoWindowApplication, public BuiltinR }; static RandomSampler rng(69); // Initialize RNG with seed + + // Helper function to check if cube intersects unit sphere at origin + auto isCubeOutsideUnitSphere = [](const float32_t3& translation, const float32_t3& scale) -> bool { + float cubeRadius = glm::length(scale) * 0.5f; + float distanceToCenter = glm::length(translation); + return (distanceToCenter - cubeRadius) > 1.0f; + }; + + static TRS lastTRS = {}; if (ImGui::Button("Randomize Translation")) { - m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); } ImGui::SameLine(); - if (ImGui::Button("Randomize Rotation")) { + lastTRS = m_TRS; // Backup before randomizing m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); } ImGui::SameLine(); - if (ImGui::Button("Randomize Scale")) { - m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); } - - ImGui::SameLine(); + //ImGui::SameLine(); if (ImGui::Button("Randomize All")) { - m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); - m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); - m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + lastTRS = m_TRS; // Backup before randomizing + int attempts = 0; + do { + m_TRS.translation = float32_t3(rng.nextFloat(-3.f, 3.f), rng.nextFloat(-3.f, 3.f), rng.nextFloat(-1.f, 3.f)); + m_TRS.rotation = float32_t3(rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f), rng.nextFloat(-180.f, 180.f)); + m_TRS.scale = float32_t3(rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f), rng.nextFloat(0.5f, 2.0f)); + attempts++; + } while (!isCubeOutsideUnitSphere(m_TRS.translation, m_TRS.scale) && attempts < 100); + } + ImGui::SameLine(); + if (ImGui::Button("Revert to Last")) + { + m_TRS = lastTRS; // Restore backed-up TRS } addMatrixTable("Model Matrix", "ModelMatrixTable", 4, 4, &m_OBBModelMatrix[0][0]);