From 867b6b8a027616ed20a43b9d89c4c7c32335614a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 13:57:18 +0100 Subject: [PATCH 001/140] Created `nbl::system::to_string` utility function --- examples_tests | 2 +- include/nbl/system/to_string.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 4ab1de2235..0995b6797a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 4ab1de2235365833db2d089259000bec2bcce3e3 +Subproject commit 0995b6797adc8c7bd1af9fded71098a035a04ffc diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 92888704c0..70ecfba211 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,6 +2,8 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include +#include +#include namespace nbl { @@ -19,6 +21,24 @@ struct to_string_helper } }; +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + template struct to_string_helper> { @@ -39,6 +59,18 @@ struct to_string_helper> } }; +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + } template From ac75675d228b86ab05935f16cd196f14f53d2d88 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 15:37:21 +0100 Subject: [PATCH 002/140] Removed from the `to_string` function specialization of types not present yet in the master branch Signed-off-by: Corey --- include/nbl/system/to_string.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..92888704c0 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,8 +2,6 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include -#include -#include namespace nbl { @@ -21,24 +19,6 @@ struct to_string_helper } }; -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_uint64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - -template<> -struct to_string_helper -{ - static std::string __call(const hlsl::emulated_int64_t& value) - { - return std::to_string(static_cast(value)); - } -}; - template struct to_string_helper> { @@ -59,18 +39,6 @@ struct to_string_helper> } }; -template -struct to_string_helper> -{ - using value_t = hlsl::morton::code; - static std::string __call(value_t value) - { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); - } -}; - - } template From 3e5cbdef01d6c8c275efc9ed3732db3c42b10646 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:04:55 +0100 Subject: [PATCH 003/140] Restored the removed `system::to_string` specializations Signed-off-by: Corey --- include/nbl/system/to_string.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 92888704c0..70ecfba211 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -2,6 +2,8 @@ #define _NBL_SYSTEM_TO_STRING_INCLUDED_ #include +#include +#include namespace nbl { @@ -19,6 +21,24 @@ struct to_string_helper } }; +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_uint64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + +template<> +struct to_string_helper +{ + static std::string __call(const hlsl::emulated_int64_t& value) + { + return std::to_string(static_cast(value)); + } +}; + template struct to_string_helper> { @@ -39,6 +59,18 @@ struct to_string_helper> } }; +template +struct to_string_helper> +{ + using value_t = hlsl::morton::code; + static std::string __call(value_t value) + { + TestValueToTextConverter mortonCodeDataToTextConverter; + return mortonCodeDataToTextConverter(value.value); + } +}; + + } template From 7871e1bed0d3d084dc3e8cc01bb7638dd0aa3906 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 1 Dec 2025 16:14:26 +0100 Subject: [PATCH 004/140] Fixes Signed-off-by: Corey --- include/nbl/system/to_string.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 70ecfba211..3169503a06 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -65,8 +65,7 @@ struct to_string_helper> using value_t = hlsl::morton::code; static std::string __call(value_t value) { - TestValueToTextConverter mortonCodeDataToTextConverter; - return mortonCodeDataToTextConverter(value.value); + return to_string_helper::__call(value.value); } }; From e97c681b830607b2b4e25b1f3576a07d2d432a3e Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 16:09:17 +0100 Subject: [PATCH 005/140] Updated examples_tests Signed-off-by: Corey --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 0995b6797a..cc4f871dce 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 0995b6797adc8c7bd1af9fded71098a035a04ffc +Subproject commit cc4f871dce0ccf56b54118c4e90ecf2b3107d19e From 8d773e5c0704e1ab0a7a71609061135cbe2927e0 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:08:31 +0100 Subject: [PATCH 006/140] Updated examples Signed-off-by: Corey --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index cc4f871dce..ab5e466db4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit cc4f871dce0ccf56b54118c4e90ecf2b3107d19e +Subproject commit ab5e466db43ff94e748bae478d0c0e28a492dfc8 From 138fb9b610e6d72d024474c1494972ed88b501db Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 17:19:21 +0100 Subject: [PATCH 007/140] Updated examples Signed-off-by: Corey --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index ab5e466db4..c593979c42 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit ab5e466db43ff94e748bae478d0c0e28a492dfc8 +Subproject commit c593979c42627b49524690ea7a7717a2d7ca5fdf From d81a1998b8b4f782da31380218defb43602df048 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 19:44:12 +0100 Subject: [PATCH 008/140] Updated examples Signed-off-by: Corey --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index c593979c42..8114cb0740 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit c593979c42627b49524690ea7a7717a2d7ca5fdf +Subproject commit 8114cb0740323bbde03375c731bce34d6eeeb8d9 From e9dc2795ed716141592d60fa094c3782f05cdb48 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:15:57 +0100 Subject: [PATCH 009/140] get latest glm Signed-off-by: Corey --- 3rdparty/glm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/glm b/3rdparty/glm index 2d4c4b4dd3..8f6213d379 160000 --- a/3rdparty/glm +++ b/3rdparty/glm @@ -1 +1 @@ -Subproject commit 2d4c4b4dd31fde06cfffad7915c2b3006402322f +Subproject commit 8f6213d379a904f5ae910e09a114e066e25faf57 From 5e30ad9fdfb7eae76ec4f185e190d1f13be00caa Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Mon, 15 Dec 2025 20:32:48 +0100 Subject: [PATCH 010/140] Extended matrix Signed-off-by: Corey --- .../nbl/builtin/hlsl/cpp_compat/matrix.hlsl | 9 +++++++++ include/nbl/system/to_string.h | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl index 1ee5edf275..712ce5e979 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/matrix.hlsl @@ -44,6 +44,15 @@ struct matrix final : private glm::mat { return glm::operator*(reinterpret_cast(rhs), lhs); } + + inline friend bool operator==(matrix const& lhs, matrix const& rhs) + { + return glm::operator==(reinterpret_cast(lhs), reinterpret_cast(rhs)); + } + inline friend bool operator!=(matrix const& lhs, matrix const& rhs) + { + return glm::operator!=(reinterpret_cast(lhs), reinterpret_cast(rhs)); + } }; #endif diff --git a/include/nbl/system/to_string.h b/include/nbl/system/to_string.h index 3169503a06..c055434fa4 100644 --- a/include/nbl/system/to_string.h +++ b/include/nbl/system/to_string.h @@ -59,6 +59,24 @@ struct to_string_helper> } }; +template +struct to_string_helper> +{ + static std::string __call(const hlsl::matrix& matrix) + { + std::stringstream output; + output << '\n'; + for (int i = 0; i < N; ++i) + { + output << "{ "; + for (int j = 0; j < M; ++j) + output << matrix[i][j] << ", "; + output << "}\n"; + } + return output.str(); + } +}; + template struct to_string_helper> { From 95170b993d1d6ad133ce03355495a8f0cb6610ff Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:28:22 +0100 Subject: [PATCH 011/140] update gli now Signed-off-by: Corey --- 3rdparty/gli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/gli b/3rdparty/gli index c4e6446d3b..2749a197e8 160000 --- a/3rdparty/gli +++ b/3rdparty/gli @@ -1 +1 @@ -Subproject commit c4e6446d3b646538026fd5a95533daed952878d4 +Subproject commit 2749a197e88f94858f4108732824b3790064f6ec From 252c777e6c539bf0398f40efcdf196e127f731f6 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 15 Dec 2025 20:32:32 +0100 Subject: [PATCH 012/140] fixed example 22 --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 8114cb0740..2d59279740 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 8114cb0740323bbde03375c731bce34d6eeeb8d9 +Subproject commit 2d5927974073dd3ada6a0a52134355d8022876a3 From d86b4cb765d8a5288abfdac514807cd5622e5975 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Wed, 17 Dec 2025 18:38:00 +0100 Subject: [PATCH 013/140] Implemented relative approx compare Signed-off-by: Corey --- include/nbl/builtin/hlsl/ieee754.hlsl | 18 ++++ .../hlsl/testing/relative_approx_compare.hlsl | 94 +++++++++++++++++++ .../hlsl/vector_utils/vector_traits.hlsl | 21 ++--- src/nbl/builtin/CMakeLists.txt | 2 + 4 files changed, 121 insertions(+), 14 deletions(-) create mode 100644 include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 29c48a79d1..a3930a362a 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -251,6 +251,24 @@ NBL_CONSTEXPR_FUNC T flipSignIfRHSNegative(T val, T flip) return impl::flipSignIfRHSNegative_helper::__call(val, flip); } +template ) +NBL_CONSTEXPR_FUNC bool isSubnormal(T val) +{ + const uint32_t biasedExponent = extractBiasedExponent(val); + const typename unsigned_integer_of_size::type mantissa = extractMantissa(val); + return biasedExponent == 0 && mantissa != 0u; +} + +template ) +NBL_CONSTEXPR_FUNC bool isZero(T val) +{ + using traits_t = traits; + using AsUint = typename unsigned_integer_of_size::type; + + const AsUint exponentAndMantissaMask = ~traits_t::signMask; + return !(ieee754::impl::bitCastToUintType(val) & exponentAndMantissaMask); +} + } } } diff --git a/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl b/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl new file mode 100644 index 0000000000..8d32780f93 --- /dev/null +++ b/include/nbl/builtin/hlsl/testing/relative_approx_compare.hlsl @@ -0,0 +1,94 @@ +#ifndef _NBL_BUILTIN_HLSL_TESTING_RELATIVE_APPROX_COMPARE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TESTING_RELATIVE_APPROX_COMPARE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace testing +{ +namespace impl +{ + +template +struct RelativeApproxCompareHelper; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeScalar) +struct RelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPoint) lhs, NBL_CONST_REF_ARG(FloatingPoint) rhs, const float64_t maxAllowedDifference) + { + const bool bothAreNaN = nbl::hlsl::isnan(lhs) && nbl::hlsl::isnan(rhs); + const bool bothAreInf = nbl::hlsl::isinf(lhs) && nbl::hlsl::isinf(rhs); + const bool bothHaveSameSign = nbl::hlsl::ieee754::extractSign(lhs) == nbl::hlsl::ieee754::extractSign(rhs); + const bool lhsIsSubnormalOrZero = ieee754::isSubnormal(lhs) || ieee754::isZero(lhs); + const bool rhsIsSubnormalOrZero = ieee754::isSubnormal(rhs) || ieee754::isZero(rhs); + + if (bothAreNaN) + return true; + if (bothAreInf && bothHaveSameSign) + return true; + if (lhsIsSubnormalOrZero && rhsIsSubnormalOrZero) + return true; + if (!lhsIsSubnormalOrZero && rhsIsSubnormalOrZero) + return false; + if (lhsIsSubnormalOrZero && !rhsIsSubnormalOrZero) + return false; + + return hlsl::max(hlsl::abs(lhs / rhs), hlsl::abs(rhs / lhs)) <= 1.f + maxAllowedDifference; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) +struct RelativeApproxCompareHelper) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointVector) lhs, NBL_CONST_REF_ARG(FloatingPointVector) rhs, const float64_t maxAllowedDifference) + { + using traits = nbl::hlsl::vector_traits; + for (uint32_t i = 0; i < traits::Dimension; ++i) + { + if (!RelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAllowedDifference)) + return false; + } + + return true; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Matricial && concepts::FloatingPointLikeScalar::scalar_type>) +struct RelativeApproxCompareHelper && concepts::FloatingPointLikeScalar::scalar_type>) > +{ + static bool __call(NBL_CONST_REF_ARG(FloatingPointMatrix) lhs, NBL_CONST_REF_ARG(FloatingPointMatrix) rhs, const float64_t maxAllowedDifference) + { + using traits = nbl::hlsl::matrix_traits; + for (uint32_t i = 0; i < traits::RowCount; ++i) + { + if (!RelativeApproxCompareHelper::__call(lhs[i], rhs[i], maxAllowedDifference)) + return false; + } + + return true; + } +}; + +} + +template +bool relativeApproxCompare(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs, const float64_t maxAllowedDifference) +{ + return impl::RelativeApproxCompareHelper::__call(lhs, rhs, maxAllowedDifference); +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl index 652cabd7c7..95315f6e3c 100644 --- a/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl +++ b/include/nbl/builtin/hlsl/vector_utils/vector_traits.hlsl @@ -18,20 +18,13 @@ struct vector_traits NBL_CONSTEXPR_STATIC_INLINE bool IsVector = false; }; -// i choose to implement it this way because of this DXC bug: https://github.com/microsoft/DirectXShaderCom0piler/issues/7007 -#define DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(DIMENSION)\ -template \ -struct vector_traits >\ -{\ - using scalar_type = T;\ - NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = DIMENSION;\ - NBL_CONSTEXPR_STATIC_INLINE bool IsVector = true;\ -};\ - -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(1) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(2) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(3) -DEFINE_VECTOR_TRAITS_TEMPLATE_SPECIALIZATION(4) +template +struct vector_traits > +{ + using scalar_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = N; + NBL_CONSTEXPR_STATIC_INLINE bool IsVector = true; +}; } } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 75cb681d36..6549a2b691 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -357,5 +357,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") #morton codes LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") +#testing +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From f01cee4490e5b83eb07ee32b08536f5f7eca8197 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 12:43:16 +0100 Subject: [PATCH 014/140] Fixed bug in CStdoutLogger Signed-off-by: Corey --- include/nbl/system/CStdoutLogger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/system/CStdoutLogger.h b/include/nbl/system/CStdoutLogger.h index 24693edd61..a63b8cf567 100644 --- a/include/nbl/system/CStdoutLogger.h +++ b/include/nbl/system/CStdoutLogger.h @@ -15,7 +15,7 @@ class CStdoutLogger : public IThreadsafeLogger protected: virtual void threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) override { - printf(constructLogString(fmt, logLevel, args).data()); + printf("%s", constructLogString(fmt, logLevel, args).data()); fflush(stdout); } From c319660cf2169aee51087f5582bbbf96f270ca52 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 19 Dec 2025 13:52:37 +0100 Subject: [PATCH 015/140] Every ILogger::log call now uses string literal as its first argument Signed-off-by: Corey --- src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp | 2 +- src/nbl/asset/interchange/CImageLoaderJPG.cpp | 2 +- src/nbl/asset/utils/CSPIRVIntrospector.cpp | 2 +- src/nbl/system/CColoredStdoutLoggerWin32.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp index d4b9a3e394..b538f75eb3 100644 --- a/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp +++ b/src/nbl/asset/interchange/CGraphicsPipelineLoaderMTL.cpp @@ -894,7 +894,7 @@ auto CGraphicsPipelineLoaderMTL::readMaterials(system::IFile* _file, const syste case 'f': // Tf - Transmitivity currMaterial->params.transmissionFilter = readRGB(); sprintf(tmpbuf, "%s, %s: Detected Tf parameter, it won't be used in generated shader - fallback to alpha=0.5 instead", _file->getFileName().string().c_str(), currMaterial->name.c_str()); - logger.log(tmpbuf, system::ILogger::ELL_WARNING); + logger.log("%s", system::ILogger::ELL_WARNING, tmpbuf); break; case 'r': // Tr, transparency = 1.0-d currMaterial->params.opacity = (1.f - readFloat()); diff --git a/src/nbl/asset/interchange/CImageLoaderJPG.cpp b/src/nbl/asset/interchange/CImageLoaderJPG.cpp index 45677ff5cf..1db5e16ac2 100644 --- a/src/nbl/asset/interchange/CImageLoaderJPG.cpp +++ b/src/nbl/asset/interchange/CImageLoaderJPG.cpp @@ -93,7 +93,7 @@ namespace jpeg std::string errMsg("JPEG FATAL ERROR in "); auto ctx = reinterpret_cast(cinfo->client_data); errMsg += ctx->filename; - ctx->logger.log(errMsg + temp1, system::ILogger::ELL_ERROR); + ctx->logger.log("%s", system::ILogger::ELL_ERROR, errMsg + temp1); } /* Initialize source. This is called by jpeg_read_header() before any diff --git a/src/nbl/asset/utils/CSPIRVIntrospector.cpp b/src/nbl/asset/utils/CSPIRVIntrospector.cpp index 4ac78066a7..818fbc584b 100644 --- a/src/nbl/asset/utils/CSPIRVIntrospector.cpp +++ b/src/nbl/asset/utils/CSPIRVIntrospector.cpp @@ -1054,7 +1054,7 @@ void CSPIRVIntrospector::CStageIntrospectionData::debugPrint(system::ILogger* lo } } - logger->log(debug.str() + '\n'); + logger->log("%s", system::ILogger::ELL_DEBUG, debug.str() + '\n'); } } \ No newline at end of file diff --git a/src/nbl/system/CColoredStdoutLoggerWin32.cpp b/src/nbl/system/CColoredStdoutLoggerWin32.cpp index e664ae84bc..f2690a81b4 100644 --- a/src/nbl/system/CColoredStdoutLoggerWin32.cpp +++ b/src/nbl/system/CColoredStdoutLoggerWin32.cpp @@ -15,7 +15,7 @@ CColoredStdoutLoggerWin32::CColoredStdoutLoggerWin32(core::bitflag void CColoredStdoutLoggerWin32::threadsafeLog_impl(const std::string_view& fmt, E_LOG_LEVEL logLevel, va_list args) { SetConsoleTextAttribute(m_native_console, getConsoleColor(logLevel)); - printf(constructLogString(fmt, logLevel, args).data()); + printf("%s", constructLogString(fmt, logLevel, args).data()); fflush(stdout); SetConsoleTextAttribute(m_native_console, 15); // restore to white } From d3c1e84eef5072db96da6a20ff05f661070ce98c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Feb 2025 14:16:42 +0700 Subject: [PATCH 016/140] initial example --- examples_tests | 2 +- .../hlsl/sampling/box_muller_transform.hlsl | 27 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 6 ++--- 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl diff --git a/examples_tests b/examples_tests index 2d59279740..b171724bb0 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2d5927974073dd3ada6a0a52134355d8022876a3 +Subproject commit b171724bb0db3bf6f144d6eb077e95ddea806cbd diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl new file mode 100644 index 0000000000..efa8d66e2b --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_ + +#include "nbl/builtin/hlsl/math/functions.hlsl" +#include "nbl/builtin/hlsl/numbers.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +template +vector boxMullerTransform(vector xi, T stddev) +{ + T sinPhi, cosPhi; + nbl::hlsl::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + return vector(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; +} + +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 6549a2b691..5ca0f3593f 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -256,9 +256,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted_spheres.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quotient_and_pdf.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform_spheres.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") # From babda462a2b631b6ff6bffa1402a3e1467bd1a92 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 6 Feb 2025 16:03:31 +0700 Subject: [PATCH 017/140] use bxdf creation params struct Signed-off-by: Corey --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/common.hlsl | 127 +++- include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 708 ++++++++++++++++++ .../nbl/builtin/hlsl/bxdf/transmission.hlsl | 604 +++++++++++++++ 4 files changed, 1419 insertions(+), 22 deletions(-) diff --git a/examples_tests b/examples_tests index b171724bb0..5a5fbfe55a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit b171724bb0db3bf6f144d6eb077e95ddea806cbd +Subproject commit 5a5fbfe55aa4cf062c562f19507ba30de085b7a6 diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index ebad0a925c..1a8e6d3086 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -1097,27 +1097,11 @@ NBL_CONCEPT_END( #include } -#define NBL_CONCEPT_NAME MicrofacetBRDF -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (T) -#define NBL_CONCEPT_PARAM_0 (bxdf, T) -#define NBL_CONCEPT_PARAM_1 (aniso, typename T::anisotropic_interaction_type) -#define NBL_CONCEPT_PARAM_2 (u, vector) -#define NBL_CONCEPT_PARAM_3 (anisocache, typename T::anisocache_type) -NBL_CONCEPT_BEGIN(4) -#define bxdf NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define aniso NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -#define anisocache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::microfacet_bxdf_common, T)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((bxdf.generate(aniso,u,anisocache)), ::nbl::hlsl::is_same_v, typename T::sample_type)) -); -#undef anisocache -#undef u -#undef aniso -#undef bxdf -#include +// unified param struct for calls to BxDF::eval, BxDF::pdf, BxDF::quotient_and_pdf +template) +struct SBxDFParams +{ + using this_t = SBxDFParams; #define NBL_CONCEPT_NAME MicrofacetBSDF #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) @@ -1185,6 +1169,107 @@ NBL_CONCEPT_END( #undef bxdf #include + template && surface_interactions::Isotropic && IsotropicMicrofacetCache) + static this_t create(LightSample _sample, Iso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE) + { + this_t retval; + retval.NdotH = cache.NdotH; + retval.NdotH2 = cache.NdotH2; + retval.NdotV = clamp == BCM_ABS ? abs(interaction.NdotV) : + clamp == BCM_MAX ? max(interaction.NdotV, 0.0) : + interaction.NdotV; + retval.uNdotV = interaction.NdotV; + retval.NdotV2 = interaction.NdotV2; + retval.NdotL = clamp == BCM_ABS ? abs(_sample.NdotL) : + clamp == BCM_MAX ? max(_sample.NdotL, 0.0) : + _sample.NdotL; + retval.uNdotL = _sample.NdotL; + retval.NdotL2 = _sample.NdotL2; + retval.VdotH = cache.VdotH; + retval.LdotH = cache.LdotH; + retval.VdotL = _sample.VdotL; + retval.is_aniso = false; + return retval; + } + + template && surface_interactions::Anisotropic && AnisotropicMicrofacetCache) + static SBxDFParams create(LightSample _sample, Aniso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE) + { + this_t retval; + retval.NdotH = cache.NdotH; + retval.NdotH2 = cache.NdotH2; + retval.NdotV = clamp == BCM_ABS ? abs(interaction.NdotV) : + clamp == BCM_MAX ? max(interaction.NdotV, 0.0) : + interaction.NdotV; + retval.uNdotV = interaction.NdotV; + retval.NdotV2 = interaction.NdotV2; + retval.NdotL = clamp == BCM_ABS ? abs(_sample.NdotL) : + clamp == BCM_MAX ? max(_sample.NdotL, 0.0) : + _sample.NdotL; + retval.uNdotL = _sample.NdotL; + retval.NdotL2 = _sample.NdotL2; + retval.VdotH = cache.VdotH; + retval.LdotH = cache.LdotH; + retval.VdotL = _sample.VdotL; + + retval.is_aniso = true; + retval.TdotH2 = cache.TdotH * cache.TdotH; + retval.BdotH2 = cache.BdotH * cache.BdotH; + retval.TdotL2 = _sample.TdotL * _sample.TdotL; + retval.BdotL2 = _sample.BdotL * _sample.BdotL; + retval.TdotV2 = interaction.TdotV * interaction.TdotV; + retval.BdotV2 = interaction.BdotV * interaction.BdotV; + return retval; + } + + Scalar getMaxNdotV() { return max(uNdotV, 0.0); } + Scalar getAbsNdotV() { return abs(uNdotV); } + + Scalar getMaxNdotL() { return max(uNdotL, 0.0); } + Scalar getAbsNdotL() { return abs(uNdotL); } + + // iso + Scalar NdotH; + Scalar NdotH2; + Scalar NdotV; + Scalar NdotV2; + Scalar NdotL; + Scalar NdotL2; + Scalar VdotH; + Scalar LdotH; + Scalar VdotL; + + // aniso + bool is_aniso; + Scalar TdotH2; + Scalar BdotH2; + Scalar TdotL2; + Scalar BdotL2; + Scalar TdotV2; + Scalar BdotV2; + + // original, unclamped + Scalar uNdotL; + Scalar uNdotV; +}; + +// unified param struct for calls to BxDF::create +template) +struct SBxDFCreationParams +{ + bool is_aniso; + Scalar A; + vector Axy; + Spectrum ior0; + Spectrum ior1; + Scalar eta; + Spectrum eta2; + Spectrum luminosityContributionHint; +}; + +// fresnel stuff +namespace impl +{ template NBL_BOOL_CONCEPT MicrofacetBxDF = MicrofacetBRDF || MicrofacetBSDF; template diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl index c5d4b019c8..f9c90a1373 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl @@ -19,6 +19,714 @@ namespace hlsl // After Clang-HLSL introduces https://en.cppreference.com/w/cpp/language/namespace_alias // namespace brdf = bxdf::reflection; +// for information why we don't check the relation between `V` and `L` or `N` and `H`, see comments for `nbl::hlsl::transmission::cos_quotient_and_pdf` +template && is_floating_point_v) +quotient_and_pdf cos_quotient_and_pdf() +{ + return quotient_and_pdf::create(SpectralBins(1.f), numeric_limits::infinity); +} + +// basic bxdfs +template && surface_interactions::Isotropic && surface_interactions::Anisotropic) +struct SLambertianBxDF +{ + using this_t = SLambertianBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using isotropic_type = Iso; + using anisotropic_type = Aniso; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using params_t = SBxDFParams; + + static this_t create() + { + this_t retval; + // nothing here, just keeping in convention with others + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + return create(); + } + + scalar_type __eval_pi_factored_out(scalar_type maxNdotL) + { + return maxNdotL; + } + + scalar_type eval(NBL_CONST_REF_ARG(params_t) params) + { + return __eval_pi_factored_out(params.NdotL) * numbers::inv_pi; + } + + sample_type generate_wo_clamps(anisotropic_type interaction, vector u) + { + ray_dir_info_type L; + L.direction = projected_hemisphere_generate(u); + return sample_type::createFromTangentSpace(interaction.getTangentSpaceV(), L, interaction.getFromTangentSpace()); + } + + sample_type generate(anisotropic_type interaction, vector u) + { + return generate_wo_clamps(interaction, u); + } + + scalar_type pdf(NBL_CONST_REF_ARG(params_t) params) + { + return projected_hemisphere_pdf(params.NdotL); + } + + quotient_pdf_type quotient_and_pdf(NBL_CONST_REF_ARG(params_t) params) + { + scalar_type _pdf; + scalar_type q = projected_hemisphere_quotient_and_pdf(_pdf, params.NdotL); + return quotient_pdf_type::create((spectral_type)(q), _pdf); + } +}; + + +template && surface_interactions::Isotropic && surface_interactions::Anisotropic) +struct SOrenNayarBxDF +{ + using this_t = SOrenNayarBxDF; + using scalar_type = typename LightSample::scalar_type; + using vector2_type = vector; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + + using isotropic_type = Iso; + using anisotropic_type = Aniso; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using params_t = SBxDFParams; + + static this_t create(scalar_type A) + { + this_t retval; + retval.A = A; + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + return create(params.A); + } + + scalar_type __rec_pi_factored_out_wo_clamps(scalar_type VdotL, scalar_type maxNdotL, scalar_type maxNdotV) + { + scalar_type A2 = A * 0.5; + vector2_type AB = vector2_type(1.0, 0.0) + vector2_type(-0.5, 0.45) * vector2_type(A2, A2) / vector2_type(A2 + 0.33, A2 + 0.09); + scalar_type C = 1.0 / max(maxNdotL, maxNdotV); + + scalar_type cos_phi_sin_theta = max(VdotL - maxNdotL * maxNdotV, 0.0); + return (AB.x + AB.y * cos_phi_sin_theta * C); + } + + scalar_type eval(params_t params) + { + return params.NdotL * numbers::inv_pi * __rec_pi_factored_out_wo_clamps(params.VdotL, params.NdotL, params.NdotV); + } + + sample_type generate_wo_clamps(anisotropic_type interaction, vector2_type u) + { + ray_dir_info_type L; + L.direction = projected_hemisphere_generate(u); + return sample_type::createFromTangentSpace(interaction.getTangentSpaceV(), L, interaction.getFromTangentSpace()); + } + + sample_type generate(anisotropic_type interaction, vector2_type u) + { + return generate_wo_clamps(interaction, u); + } + + scalar_type pdf(params_t params) + { + return projected_hemisphere_pdf(params.NdotL); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + scalar_type _pdf; + projected_hemisphere_quotient_and_pdf(_pdf, params.NdotL); + scalar_type q = __rec_pi_factored_out_wo_clamps(params.VdotL, params.NdotL, params.NdotV); + return quotient_pdf_type::create((spectral_type)(q), _pdf); + } + + scalar_type A; +}; + + +// microfacet bxdfs + +// do not use, not tested, also shit +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SBlinnPhongBxDF +{ + using this_t = SBlinnPhongBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using matrix2x3_type = matrix; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + static this_t create(vector2_type n, spectral_type ior0, spectral_type ior1) + { + this_t retval; + retval.n = n; + retval.ior0 = ior0; + retval.ior1 = ior1; + return retval; + } + + template + static T phong_exp_to_alpha2(T n) + { + return 2.0 / (n + 2.0); + } + + template + static T alpha2_to_phong_exp(T a2) + { + return 2.0 / a2 - 2.0; + } + + template // this or specialize? + scalar_type __eval_DG_wo_clamps(params_t params, vector2_type a2) + { + if (aniso) + { + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(params.NdotH, 1.0 / (1.0 - params.NdotH2), params.TdotH2, params.BdotH2, n.x, n.y); + ndf::BlinnPhong blinn_phong; + scalar_type DG = blinn_phong(ndfparams); + if (any>(a2 > (vector2_type)numeric_limits::min)) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(a2.x, a2.y, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0); + smith::Beckmann beckmann; + DG *= beckmann.correlated(smithparams); + } + return DG; + } + else + { + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(n, params.NdotH, params.NdotH2); + ndf::BlinnPhong blinn_phong; + scalar_type NG = blinn_phong(ndfparams); + if (any>(a2 > (vector2_type)numeric_limits::min)) + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(a2.x, params.NdotV2, params.NdotL2, 0); + smith::Beckmann beckmann; + NG *= beckmann.correlated(smithparams); + } + return NG; + } + } + + template + vector3_type __eval_wo_clamps(params_t params) + { + scalar_type scalar_part; + if (aniso) + { + vector2_type a2 = phong_exp_to_alpha2(n); + scalar_part = __eval_DG_wo_clamps(params, a2); + } + else + { + vector2_type a2 = (vector2_type)phong_exp_to_alpha2(n); + scalar_part = __eval_DG_wo_clamps(params, a2); + } + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT> microfacet_transform = ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT>::create(scalar_part, params.NdotV); + return fresnelConductor(ior0, ior1, params.VdotH) * microfacet_transform(); + } + + vector3_type eval(sample_type _sample, isotropic_type interaction, isocache_type cache) + { + if (interaction.NdotV > numeric_limits::min) + { + params_t params = params_t::template create(_sample, interaction, cache); + return __eval_wo_clamps(params); + } + else + return (vector3_type)0.0; + } + + vector3_type eval(sample_type _sample, anisotropic_type interaction, anisocache_type cache) + { + if (interaction.NdotV > numeric_limits::min) + { + params_t params = params_t::template create(_sample, interaction, cache); + return __eval_wo_clamps(params); + } + else + return (vector3_type)0.0; + } + + vector3_type generate(vector2_type u, scalar_type n) + { + scalar_type phi = 2.0 * numbers::pi * u.y; + scalar_type cosTheta = pow(u.x, 1.0/(n+1.0)); + scalar_type sinTheta = sqrt(1.0 - cosTheta * cosTheta); + scalar_type cosPhi = cos(phi); + scalar_type sinPhi = sin(phi); + return vector3_type(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta); + } + + sample_type generate(anisotropic_type interaction, vector2_type u, NBL_REF_ARG(anisocache_type) cache) + { + const vector3_type H = generate(u, n.x); + const vector3_type localV = interaction.getTangentSpaceV(); + + cache = anisocache_type::create(localV, H); + ray_dir_info_type localL; + localL.direction = math::reflect(localV, H, cache.VdotH); + + return sample_type::createFromTangentSpace(localV, localL, interaction.getFromTangentSpace()); + } + + // where pdf? + + vector2_type n; + spectral_type ior0, ior1; +}; + +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SBeckmannBxDF +{ + using this_t = SBeckmannBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using matrix2x3_type = matrix; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + // iso + static this_t create(scalar_type A, spectral_type ior0, spectral_type ior1) + { + this_t retval; + retval.A = vector2_type(A,A); + retval.ior0 = ior0; + retval.ior1 = ior1; + return retval; + } + + // aniso + static this_t create(scalar_type ax, scalar_type ay, spectral_type ior0, spectral_type ior1) + { + this_t retval; + retval.A = vector2_type(ax,ay); + retval.ior0 = ior0; + retval.ior1 = ior1; + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + if (params.is_aniso) + return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1); + else + return create(params.A, params.ior0, params.ior1); + } + + scalar_type __eval_DG_wo_clamps(params_t params) + { + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::Beckmann beckmann_ndf; + scalar_type NG = beckmann_ndf(ndfparams); + if (any>(A > (vector2_type)numeric_limits::min)) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0); + smith::Beckmann beckmann_smith; + NG *= beckmann_smith.correlated(smithparams); + } + return NG; + } + else + { + scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::Beckmann beckmann_ndf; + scalar_type NG = beckmann_ndf(ndfparams); + if (a2 > numeric_limits::min) + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(a2, params.NdotV2, params.NdotL2, 0); + smith::Beckmann beckmann_smith; + NG *= beckmann_smith.correlated(smithparams); + } + return NG; + } + } + + spectral_type eval(params_t params) + { + if (params.uNdotV > numeric_limits::min) + { + scalar_type scalar_part = __eval_DG_wo_clamps(params); + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT> microfacet_transform = ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT>::create(scalar_part, params.uNdotV); + return fresnelConductor(ior0, ior1, params.VdotH) * microfacet_transform(); + } + else + return (spectral_type)0.0; + } + + vector3_type __generate(vector3_type localV, vector2_type u) + { + //stretch + vector3_type V = nbl::hlsl::normalize(vector3_type(A.x * localV.x, A.y * localV.y, localV.z)); + + vector2_type slope; + if (V.z > 0.9999)//V.z=NdotV=cosTheta in tangent space + { + scalar_type r = sqrt(-log(1.0 - u.x)); + scalar_type sinPhi = sin(2.0 * numbers::pi * u.y); + scalar_type cosPhi = cos(2.0 * numbers::pi * u.y); + slope = (vector2_type)r * vector2_type(cosPhi,sinPhi); + } + else + { + scalar_type cosTheta = V.z; + scalar_type sinTheta = sqrt(1.0 - cosTheta * cosTheta); + scalar_type tanTheta = sinTheta / cosTheta; + scalar_type cotTheta = 1.0 / tanTheta; + + scalar_type a = -1.0; + scalar_type c = erf(cosTheta); + scalar_type sample_x = max(u.x, 1.0e-6); + scalar_type theta = acos(cosTheta); + scalar_type fit = 1.0 + theta * (-0.876 + theta * (0.4265 - 0.0594*theta)); + scalar_type b = c - (1.0 + c) * pow(1.0-sample_x, fit); + + scalar_type normalization = 1.0 / (1.0 + c + numbers::inv_sqrtpi * tanTheta * exp(-cosTheta*cosTheta)); + + const int ITER_THRESHOLD = 10; + const float MAX_ACCEPTABLE_ERR = 1.0e-5; + int it = 0; + float value=1000.0; + while (++it < ITER_THRESHOLD && nbl::hlsl::abs(value) > MAX_ACCEPTABLE_ERR) + { + if (!(b >= a && b <= c)) + b = 0.5 * (a + c); + + float invErf = erfInv(b); + value = normalization * (1.0 + b + numbers::inv_sqrtpi * tanTheta * exp(-invErf * invErf)) - sample_x; + float derivative = normalization * (1.0 - invErf * cosTheta); + + if (value > 0.0) + c = b; + else + a = b; + + b -= value/derivative; + } + // TODO: investigate if we can replace these two erf^-1 calls with a box muller transform + slope.x = erfInv(b); + slope.y = erfInv(2.0 * max(u.y, 1.0e-6) - 1.0); + } + + scalar_type sinTheta = sqrt(1.0 - V.z*V.z); + scalar_type cosPhi = sinTheta==0.0 ? 1.0 : clamp(V.x/sinTheta, -1.0, 1.0); + scalar_type sinPhi = sinTheta==0.0 ? 0.0 : clamp(V.y/sinTheta, -1.0, 1.0); + //rotate + scalar_type tmp = cosPhi*slope.x - sinPhi*slope.y; + slope.y = sinPhi*slope.x + cosPhi*slope.y; + slope.x = tmp; + + //unstretch + slope = vector2_type(A.x,A.y)*slope; + + return nbl::hlsl::normalize(vector3_type(-slope, 1.0)); + } + + sample_type generate(anisotropic_type interaction, vector2_type u, NBL_REF_ARG(anisocache_type) cache) + { + const vector3_type localV = interaction.getTangentSpaceV(); + const vector3_type H = __generate(localV, u); + + cache = anisocache_type::create(localV, H); + ray_dir_info_type localL; + localL.direction = math::reflect(localV, H, cache.VdotH); + + return sample_type::createFromTangentSpace(localV, localL, interaction.getFromTangentSpace()); + } + + scalar_type pdf(params_t params, NBL_REF_ARG(scalar_type) onePlusLambda_V) + { + scalar_type ndf, lambda; + if (params.is_aniso) + { + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, A.x*A.x, A.y*A.y, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::Beckmann beckmann_ndf; + ndf = beckmann_ndf(ndfparams); + + smith::Beckmann beckmann_smith; + const scalar_type c2 = beckmann_smith.C2(params.TdotV2, params.BdotV2, params.NdotV2, A.x, A.y); + lambda = beckmann_smith.Lambda(c2); + } + else + { + scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::Beckmann beckmann_ndf; + ndf = beckmann_ndf(ndfparams); + + smith::Beckmann beckmann_smith; + lambda = beckmann_smith.Lambda(params.NdotV2, a2); + } + + return smith::VNDF_pdf_wo_clamps >(ndf, lambda, params.uNdotV, onePlusLambda_V); + } + + scalar_type pdf(params_t params) + { + scalar_type dummy; + return pdf(params, dummy); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + scalar_type onePlusLambda_V; + scalar_type _pdf = pdf(params, onePlusLambda_V); + + smith::Beckmann beckmann_smith; + spectral_type quo = (spectral_type)0.0; + if (params.uNdotL > numeric_limits::min && params.uNdotV > numeric_limits::min) + { + scalar_type G2_over_G1; + if (params.is_aniso) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(A.x*A.x, A.y*A.y, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, onePlusLambda_V); + G2_over_G1 = beckmann_smith.G2_over_G1(smithparams); + } + else + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(A.x*A.x, params.NdotV2, params.NdotL2, onePlusLambda_V); + G2_over_G1 = beckmann_smith.G2_over_G1(smithparams); + } + const spectral_type reflectance = fresnelConductor(ior0, ior1, params.VdotH); + quo = reflectance * G2_over_G1; + } + + return quotient_pdf_type::create(quo, _pdf); + } + + vector2_type A; + spectral_type ior0, ior1; +}; + +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SGGXBxDF +{ + using this_t = SGGXBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using matrix2x3_type = matrix; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + // iso + static this_t create(scalar_type A, spectral_type ior0, spectral_type ior1) + { + this_t retval; + retval.A = vector2_type(A,A); + retval.ior0 = ior0; + retval.ior1 = ior1; + return retval; + } + + // aniso + static this_t create(scalar_type ax, scalar_type ay, spectral_type ior0, spectral_type ior1) + { + this_t retval; + retval.A = vector2_type(ax,ay); + retval.ior0 = ior0; + retval.ior1 = ior1; + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + if (params.is_aniso) + return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1); + else + return create(params.A, params.ior0, params.ior1); + } + + scalar_type __eval_DG_wo_clamps(params_t params) + { + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::GGX ggx_ndf; + scalar_type NG = ggx_ndf(ndfparams); + if (any>(A > (vector2_type)numeric_limits::min)) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.NdotV, params.TdotV2, params.BdotV2, params.NdotV2, params.NdotL, params.TdotL2, params.BdotL2, params.NdotL2); + smith::GGX ggx_smith; + NG *= ggx_smith.correlated_wo_numerator(smithparams); + } + return NG; + } + else + { + scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::GGX ggx_ndf; + scalar_type NG = ggx_ndf(ndfparams); + if (a2 > numeric_limits::min) + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(a2, params.NdotV, params.NdotV2, params.NdotL, params.NdotL2); + smith::GGX ggx_smith; + NG *= ggx_smith.correlated_wo_numerator(smithparams); + } + return NG; + } + } + + spectral_type eval(params_t params) + { + if (params.uNdotL > numeric_limits::min && params.uNdotV > numeric_limits::min) + { + scalar_type scalar_part = __eval_DG_wo_clamps(params); + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT> microfacet_transform = ndf::microfacet_to_light_measure_transform,ndf::REFLECT_BIT>::create(scalar_part, params.NdotL); + return fresnelConductor(ior0, ior1, params.VdotH) * microfacet_transform(); + } + else + return (spectral_type)0.0; + } + + vector3_type __generate(vector3_type localV, vector2_type u) + { + vector3_type V = nbl::hlsl::normalize(vector3_type(A.x*localV.x, A.y*localV.y, localV.z));//stretch view vector so that we're sampling as if roughness=1.0 + + scalar_type lensq = V.x*V.x + V.y*V.y; + vector3_type T1 = lensq > 0.0 ? vector3_type(-V.y, V.x, 0.0) * rsqrt(lensq) : vector3_type(1.0,0.0,0.0); + vector3_type T2 = cross(V,T1); + + scalar_type r = sqrt(u.x); + scalar_type phi = 2.0 * numbers::pi * u.y; + scalar_type t1 = r * cos(phi); + scalar_type t2 = r * sin(phi); + scalar_type s = 0.5 * (1.0 + V.z); + t2 = (1.0 - s)*sqrt(1.0 - t1*t1) + s*t2; + + //reprojection onto hemisphere + //TODO try it wothout the max(), not sure if -t1*t1-t2*t2>-1.0 + vector3_type H = t1*T1 + t2*T2 + sqrt(max(0.0, 1.0-t1*t1-t2*t2))*V; + //unstretch + return nbl::hlsl::normalize(vector3_type(A.x*H.x, A.y*H.y, H.z)); + } + + sample_type generate(anisotropic_type interaction, vector2_type u, NBL_REF_ARG(anisocache_type) cache) + { + const vector3_type localV = interaction.getTangentSpaceV(); + const vector3_type H = __generate(localV, u); + + cache = anisocache_type::create(localV, H); + ray_dir_info_type localL; + localL.direction = math::reflect(localV, H, cache.VdotH); + + return sample_type::createFromTangentSpace(localV, localL, interaction.getFromTangentSpace()); + } + + scalar_type pdf(params_t params) + { + scalar_type ndf, G1_over_2NdotV; + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::GGX ggx_ndf; + ndf = ggx_ndf(ndfparams); + + smith::GGX ggx_smith; + const scalar_type devsh_v = ggx_smith.devsh_part(params.TdotV2, params.BdotV2, params.NdotV2, ax2, ay2); + G1_over_2NdotV = ggx_smith.G1_wo_numerator(params.uNdotV, devsh_v); + } + else + { + const scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::GGX ggx_ndf; + ndf = ggx_ndf(ndfparams); + + smith::GGX ggx_smith; + const scalar_type devsh_v = ggx_smith.devsh_part(params.NdotV2, a2, 1.0-a2); + G1_over_2NdotV = ggx_smith.G1_wo_numerator(params.uNdotV, devsh_v); + } + return smith::VNDF_pdf_wo_clamps(ndf, G1_over_2NdotV); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + scalar_type _pdf = pdf(params); + + spectral_type quo = (spectral_type)0.0; + if (params.uNdotL > numeric_limits::min && params.uNdotV > numeric_limits::min) + { + scalar_type G2_over_G1; + smith::GGX ggx_smith; + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.uNdotV, params.TdotV2, params.BdotV2, params.NdotV2, params.uNdotL, params.TdotL2, params.BdotL2, params.NdotL2); + G2_over_G1 = ggx_smith.G2_over_G1(smithparams); + } + else + { + const scalar_type a2 = A.x*A.x; + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(a2, params.uNdotV, params.NdotV2, params.uNdotL, params.NdotL2); + G2_over_G1 = ggx_smith.G2_over_G1(smithparams); + } + const spectral_type reflectance = fresnelConductor(ior0, ior1, params.VdotH); + quo = reflectance * G2_over_G1; + } + + return quotient_pdf_type::create(quo, _pdf); + } + + vector2_type A; + spectral_type ior0, ior1; +}; + +} +} } } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl index b5b6e101c1..f34d962b93 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl @@ -20,6 +20,610 @@ namespace hlsl // After Clang-HLSL introduces https://en.cppreference.com/w/cpp/language/namespace_alias // namespace bsdf = bxdf::transmission; +// Why don't we check that the incoming and outgoing directions equal each other +// (or similar for other delta distributions such as reflect, or smooth [thin] dielectrics): +// - The `quotient_and_pdf` functions are meant to be used with MIS and RIS +// - Our own generator can never pick an improbable path, so no checking necessary +// - For other generators the estimator will be `f_BSDF*f_Light*f_Visibility*clampedCos(theta)/(1+(p_BSDF^alpha+p_otherNonChosenGenerator^alpha+...)/p_ChosenGenerator^alpha)` +// therefore when `p_BSDF` equals `nbl_glsl_FLT_INF` it will drive the overall MIS estimator for the other generators to 0 so no checking necessary +template && is_floating_point_v) +quotient_and_pdf cos_quotient_and_pdf() +{ + return quotient_and_pdf::create(SpectralBins(1.f), numeric_limits::infinity); +} + +// basic bxdf +template && surface_interactions::Isotropic && surface_interactions::Anisotropic) +struct SLambertianBxDF +{ + using this_t = SLambertianBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using isotropic_type = Iso; + using anisotropic_type = Aniso; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using params_t = SBxDFParams; + + static this_t create() + { + this_t retval; + // nothing here, just keeping convention with others + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + return create(); + } + + scalar_type __eval_pi_factored_out(scalar_type absNdotL) + { + return absNdotL; + } + + scalar_type eval(params_t params) + { + return __eval_pi_factored_out(params.NdotL) * numbers::inv_pi * 0.5; + } + + sample_type generate_wo_clamps(anisotropic_type interaction, vector u) + { + ray_dir_info_type L; + L.direction = projected_sphere_generate(u); + return sample_type::createFromTangentSpace(interaction.getTangentSpaceV(), L, interaction.getFromTangentSpace()); + } + + sample_type generate(anisotropic_type interaction, vector u) + { + return generate_wo_clamps(interaction, u); + } + + scalar_type pdf(params_t params) + { + return projected_sphere_pdf(params.NdotL); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + scalar_type _pdf; + scalar_type q = projected_sphere_quotient_and_pdf(_pdf, params.NdotL); + return quotient_pdf_type::create((spectral_type)(q), _pdf); + } +}; + + +// microfacet bxdfs +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SSmoothDielectricBxDF +{ + using this_t = SSmoothDielectricBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector3_type = vector; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + static this_t create(scalar_type eta) + { + this_t retval; + retval.eta = eta; + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + return create(params.eta); + } + + spectral_type eval(params_t params) + { + return (spectral_type)0; + } + + sample_type __generate_wo_clamps(vector3_type V, vector3_type T, vector3_type B, vector3_type N, bool backside, scalar_type NdotV, scalar_type absNdotV, scalar_type NdotV2, NBL_REF_ARG(vector3_type) u, scalar_type rcpOrientedEta, scalar_type orientedEta2, scalar_type rcpOrientedEta2, NBL_REF_ARG(bool) transmitted) + { + const scalar_type reflectance = fresnelDielectric_common(orientedEta2, absNdotV); + + scalar_type rcpChoiceProb; + transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); + + ray_dir_info_type L; + L.direction = math::reflectRefract(transmitted, V, N, backside, NdotV, NdotV2, rcpOrientedEta, rcpOrientedEta2); + return sample_type::create(L, nbl::hlsl::dot(V, L.direction), T, B, N); + } + + sample_type generate_wo_clamps(anisotropic_type interaction, NBL_REF_ARG(vector) u) + { + scalar_type orientedEta, rcpOrientedEta; + const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); + bool dummy; + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, + interaction.NdotV, interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector) u) + { + scalar_type orientedEta, rcpOrientedEta; + const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); + bool dummy; + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, + nbl::hlsl::abs(interaction.NdotV), interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy); + } + + // eval and pdf return 0 because smooth dielectric/conductor BxDFs are dirac delta distributions, model perfectly specular objects that scatter light to only one outgoing direction + scalar_type pdf(params_t params) + { + return 0; + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + const bool transmitted = isTransmissionPath(params.uNdotV, params.uNdotL); + + scalar_type dummy, rcpOrientedEta; + const bool backside = math::getOrientedEtas(dummy, rcpOrientedEta, params.NdotV, eta); + + const scalar_type _pdf = numeric_limits::infinity; + scalar_type quo = transmitted ? rcpOrientedEta : 1.0; + return quotient_pdf_type::create((spectral_type)(quo), _pdf); + } + + scalar_type eta; +}; + +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SSmoothDielectricBxDF +{ + using this_t = SSmoothDielectricBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector3_type = vector; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + static this_t create(spectral_type eta2, spectral_type luminosityContributionHint) + { + this_t retval; + retval.eta2 = eta2; + retval.luminosityContributionHint = luminosityContributionHint; + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + return create(params.eta2, params.luminosityContributionHint); + } + + spectral_type eval(params_t params) + { + return (spectral_type)0; + } + + // usually `luminosityContributionHint` would be the Rec.709 luma coefficients (the Y row of the RGB to CIE XYZ matrix) + // its basically a set of weights that determine + // assert(1.0==luminosityContributionHint.r+luminosityContributionHint.g+luminosityContributionHint.b); + // `remainderMetadata` is a variable which the generator function returns byproducts of sample generation that would otherwise have to be redundantly calculated `quotient_and_pdf` + sample_type __generate_wo_clamps(vector3_type V, vector3_type T, vector3_type B, vector3_type N, scalar_type NdotV, scalar_type absNdotV, NBL_REF_ARG(vector3_type) u, spectral_type eta2, spectral_type luminosityContributionHint, NBL_REF_ARG(spectral_type) remainderMetadata) + { + // we will only ever intersect from the outside + const spectral_type reflectance = thindielectricInfiniteScatter(fresnelDielectric_common(eta2,absNdotV)); + + // we are only allowed one choice for the entire ray, so make the probability a weighted sum + const scalar_type reflectionProb = nbl::hlsl::dot(reflectance, luminosityContributionHint); + + scalar_type rcpChoiceProb; + const bool transmitted = math::partitionRandVariable(reflectionProb, u.z, rcpChoiceProb); + remainderMetadata = (transmitted ? ((spectral_type)(1.0) - reflectance) : reflectance) * rcpChoiceProb; + + ray_dir_info_type L; + L.direction = (transmitted ? (vector3_type)(0.0) : N * 2.0f * NdotV) - V; + return sample_type::create(L, nbl::hlsl::dot(V, L.direction), T, B, N); + } + + sample_type generate_wo_clamps(anisotropic_type interaction, NBL_REF_ARG(vector) u) + { + vector3_type dummy; + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, interaction.NdotV, interaction.NdotV, u, eta2, luminosityContributionHint, dummy); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector) u) + { + vector3_type dummy; + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, interaction.NdotV, nbl::hlsl::abs(interaction.NdotV), u, eta2, luminosityContributionHint, dummy); + } + + scalar_type pdf(params_t params) + { + return 0; + } + + quotient_pdf_type quotient_and_pdf(params_t params) // isotropic + { + const bool transmitted = isTransmissionPath(params.uNdotV, params.uNdotL); + const spectral_type reflectance = thindielectricInfiniteScatter(fresnelDielectric_common(eta2, params.NdotV)); + const spectral_type sampleValue = transmitted ? ((spectral_type)(1.0) - reflectance) : reflectance; + + const scalar_type sampleProb = nbl::hlsl::dot(sampleValue,luminosityContributionHint); + + const scalar_type _pdf = numeric_limits::infinity; + return quotient_pdf_type::create((spectral_type)(sampleValue / sampleProb), _pdf); + } + + spectral_type eta2; + spectral_type luminosityContributionHint; +}; + +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SBeckmannDielectricBxDF +{ + using this_t = SBeckmannDielectricBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using matrix3x3_type = matrix; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + static this_t create(scalar_type eta, scalar_type A) + { + this_t retval; + retval.eta = eta; + retval.A = vector2_type(A, A); + return retval; + } + + static this_t create(scalar_type eta, scalar_type ax, scalar_type ay) + { + this_t retval; + retval.eta = eta; + retval.A = vector2_type(ax, ay); + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + if (params.is_aniso) + return create(params.eta, params.Axy.x, params.Axy.y); + else + return create(params.eta, params.A); + } + + spectral_type eval(params_t params) + { + scalar_type orientedEta, dummy; + const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); + const scalar_type orientedEta2 = orientedEta * orientedEta; + + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; + const bool transmitted = VdotHLdotH < 0.0; + + spectral_type dummyior; + reflection::SBeckmannBxDF beckmann; + if (params.is_aniso) + beckmann = reflection::SBeckmannBxDF::create(A.x, A.y, dummyior, dummyior); + else + beckmann = reflection::SBeckmannBxDF::create(A.x, dummyior, dummyior); + const scalar_type scalar_part = beckmann.__eval_DG_wo_clamps(params); + + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_REFRACT_BIT> microfacet_transform = + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_REFRACT_BIT>::create(scalar_part,params.NdotV,transmitted,params.VdotH,params.LdotH,VdotHLdotH,orientedEta); + return (spectral_type)fresnelDielectric_common(orientedEta2, nbl::hlsl::abs(params.VdotH)) * microfacet_transform(); + } + + sample_type __generate_wo_clamps(vector3_type localV, bool backside, vector3_type H, matrix3x3_type m, NBL_REF_ARG(vector3_type) u, scalar_type rcpOrientedEta, scalar_type orientedEta2, scalar_type rcpOrientedEta2, NBL_REF_ARG(anisocache_type) cache) + { + const scalar_type localVdotH = nbl::hlsl::dot(localV,H); + const scalar_type reflectance = fresnelDielectric_common(orientedEta2,nbl::hlsl::abs(localVdotH)); + + scalar_type rcpChoiceProb; + bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); + + cache = anisocache_type::create(localV, H); + + const scalar_type VdotH = cache.VdotH; + cache.LdotH = transmitted ? math::reflectRefract_computeNdotT(VdotH < 0.0, VdotH * VdotH, rcpOrientedEta2) : VdotH; + ray_dir_info_type localL; + localL.direction = math::reflectRefract_impl(transmitted, localV, H, VdotH, cache.LdotH, rcpOrientedEta); + + return sample_type::createFromTangentSpace(localV, localL, m); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector3_type) u, NBL_REF_ARG(anisocache_type) cache) + { + const vector3_type localV = interaction.getTangentSpaceV(); + + scalar_type orientedEta, rcpOrientedEta; + const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); + + const vector3_type upperHemisphereV = backside ? -localV : localV; + + spectral_type dummyior; + reflection::SBeckmannBxDF beckmann = reflection::SBeckmannBxDF::create(A.x, A.y, dummyior, dummyior); + const vector3_type H = beckmann.__generate(upperHemisphereV, u.xy); + + return __generate_wo_clamps(localV, backside, H, interaction.getFromTangentSpace(), u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, cache); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector3_type) u) + { + anisocache_type dummycache; + return generate(interaction, u, dummycache); + } + + scalar_type pdf(params_t params, NBL_REF_ARG(scalar_type) onePlusLambda_V) + { + scalar_type orientedEta, dummy; + const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); + const scalar_type orientedEta2 = orientedEta * orientedEta; + + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; + const bool transmitted = VdotHLdotH < 0.0; + + const scalar_type reflectance = fresnelDielectric_common(orientedEta2, nbl::hlsl::abs(params.VdotH)); + + scalar_type ndf, lambda; + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::Beckmann beckmann_ndf; + ndf = beckmann_ndf(ndfparams); + + smith::Beckmann beckmann_smith; + scalar_type c2 = beckmann_smith.C2(params.TdotV2, params.BdotV2, params.NdotV2, ax2, ay2); + lambda = beckmann_smith.Lambda(c2); + } + else + { + const scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::Beckmann beckmann_ndf; + ndf = beckmann_ndf(ndfparams); + + smith::Beckmann beckmann_smith; + lambda = beckmann_smith.Lambda(params.NdotV2, a2); + } + + return smith::VNDF_pdf_wo_clamps >(ndf,lambda,params.NdotV,transmitted,params.VdotH,params.LdotH,VdotHLdotH,orientedEta,reflectance,onePlusLambda_V); + } + + scalar_type pdf(params_t params) + { + scalar_type dummy; + return pdf(params, dummy); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + scalar_type onePlusLambda_V; + scalar_type _pdf = pdf(params, onePlusLambda_V); + + scalar_type quo; + if (params.is_aniso) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(A.x*A.x, A.y*A.y, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, onePlusLambda_V); + smith::Beckmann beckmann_smith; + quo = beckmann_smith.G2_over_G1(smithparams); + } + else + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(A.x*A.x, params.NdotV2, params.NdotL2, onePlusLambda_V); + smith::Beckmann beckmann_smith; + quo = beckmann_smith.G2_over_G1(smithparams); + } + + return quotient_pdf_type::create((spectral_type)(quo), _pdf); + } + + vector2_type A; + scalar_type eta; +}; + +template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +struct SGGXDielectricBxDF +{ + using this_t = SGGXDielectricBxDF; + using scalar_type = typename LightSample::scalar_type; + using ray_dir_info_type = typename LightSample::ray_dir_info_type; + using vector2_type = vector; + using vector3_type = vector; + using matrix3x3_type = matrix; + using params_t = SBxDFParams; + + using isotropic_type = typename IsoCache::isotropic_type; + using anisotropic_type = typename AnisoCache::anisotropic_type; + using sample_type = LightSample; + using spectral_type = Spectrum; + using quotient_pdf_type = quotient_and_pdf; + using isocache_type = IsoCache; + using anisocache_type = AnisoCache; + + static this_t create(scalar_type eta, scalar_type A) + { + this_t retval; + retval.eta = eta; + retval.A = vector2_type(A, A); + return retval; + } + + static this_t create(scalar_type eta, scalar_type ax, scalar_type ay) + { + this_t retval; + retval.eta = eta; + retval.A = vector2_type(ax, ay); + return retval; + } + + static this_t create(SBxDFCreationParams params) + { + if (params.is_aniso) + return create(params.eta, params.Axy.x, params.Axy.y); + else + return create(params.eta, params.A); + } + + spectral_type eval(params_t params) + { + scalar_type orientedEta, dummy; + const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); + const scalar_type orientedEta2 = orientedEta * orientedEta; + + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; + const bool transmitted = VdotHLdotH < 0.0; + + scalar_type NG_already_in_reflective_dL_measure; + if (params.is_aniso) + { + spectral_type dummyior; + reflection::SGGXBxDF ggx = reflection::SGGXBxDF::create(A.x, A.y, dummyior, dummyior); + NG_already_in_reflective_dL_measure = ggx.__eval_DG_wo_clamps(params); + } + else + { + spectral_type dummyior; + reflection::SGGXBxDF ggx = reflection::SGGXBxDF::create(A.x, dummyior, dummyior); + NG_already_in_reflective_dL_measure = ggx.__eval_DG_wo_clamps(params); + } + + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_REFRACT_BIT> microfacet_transform = + ndf::microfacet_to_light_measure_transform,ndf::REFLECT_REFRACT_BIT>::create(NG_already_in_reflective_dL_measure,params.NdotL,transmitted,params.VdotH,params.LdotH,VdotHLdotH,orientedEta); + return (spectral_type)fresnelDielectric_common(orientedEta2, nbl::hlsl::abs(params.VdotH)) * microfacet_transform(); + } + + sample_type __generate_wo_clamps(vector3_type localV, bool backside, vector3_type H, matrix3x3_type m, NBL_REF_ARG(vector3_type) u, scalar_type rcpOrientedEta, scalar_type orientedEta2, scalar_type rcpOrientedEta2, NBL_REF_ARG(anisocache_type) cache) + { + const scalar_type localVdotH = nbl::hlsl::dot(localV,H); + const scalar_type reflectance = fresnelDielectric_common(orientedEta2,nbl::hlsl::abs(localVdotH)); + + scalar_type rcpChoiceProb; + bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); + + cache = anisocache_type::create(localV, H); + + const scalar_type VdotH = cache.VdotH; + cache.LdotH = transmitted ? math::reflectRefract_computeNdotT(VdotH < 0.0, VdotH * VdotH, rcpOrientedEta2) : VdotH; + ray_dir_info_type localL; + localL.direction = math::reflectRefract_impl(transmitted, localV, H, VdotH, cache.LdotH, rcpOrientedEta); + + return sample_type::createFromTangentSpace(localV, localL, m); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector3_type) u, NBL_REF_ARG(anisocache_type) cache) + { + const vector3_type localV = interaction.getTangentSpaceV(); + + scalar_type orientedEta, rcpOrientedEta; + const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); + + const vector3_type upperHemisphereV = backside ? -localV : localV; + + spectral_type dummyior; + reflection::SGGXBxDF ggx = reflection::SGGXBxDF::create(A.x, A.y, dummyior, dummyior); + const vector3_type H = ggx.__generate(upperHemisphereV, u.xy); + + return __generate_wo_clamps(localV, backside, H, interaction.getFromTangentSpace(), u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, cache); + } + + sample_type generate(anisotropic_type interaction, NBL_REF_ARG(vector3_type) u) + { + anisocache_type dummycache; + return generate(interaction, u, dummycache); + } + + scalar_type pdf(params_t params) + { + scalar_type orientedEta, dummy; + const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); + const scalar_type orientedEta2 = orientedEta * orientedEta; + + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; + const bool transmitted = VdotHLdotH < 0.0; + + const scalar_type reflectance = fresnelDielectric_common(orientedEta2, nbl::hlsl::abs(params.VdotH)); + + scalar_type ndf, devsh_v; + if (params.is_aniso) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + + ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); + ndf::GGX ggx_ndf; + ndf = ggx_ndf(ndfparams); + + smith::GGX ggx_smith; + devsh_v = ggx_smith.devsh_part(params.TdotV2, params.BdotV2, params.NdotV2, ax2, ay2); + } + else + { + const scalar_type a2 = A.x*A.x; + ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(a2, params.NdotH, params.NdotH2); + ndf::GGX ggx_ndf; + ndf = ggx_ndf(ndfparams); + + smith::GGX ggx_smith; + devsh_v = ggx_smith.devsh_part(params.NdotV2, a2, 1.0-a2); + } + + smith::GGX ggx_smith; + const scalar_type lambda = ggx_smith.G1_wo_numerator(params.NdotV, devsh_v); + return smith::VNDF_pdf_wo_clamps(ndf, lambda, params.NdotV, transmitted, params.VdotH, params.LdotH, VdotHLdotH, orientedEta, reflectance); + } + + quotient_pdf_type quotient_and_pdf(params_t params) + { + const scalar_type ax2 = A.x*A.x; + const scalar_type ay2 = A.y*A.y; + + scalar_type _pdf = pdf(params); + + smith::GGX ggx_smith; + scalar_type quo; + if (params.is_aniso) + { + smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.NdotV, params.TdotV2, params.BdotV2, params.NdotV2, params.NdotL, params.TdotL2, params.BdotL2, params.NdotL2); + quo = ggx_smith.G2_over_G1(smithparams); + } + else + { + smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(ax2, params.NdotV, params.NdotV2, params.NdotL, params.NdotL2); + quo = ggx_smith.G2_over_G1(smithparams); + } + + return quotient_pdf_type::create((spectral_type)(quo), _pdf); + } + + vector2_type A; + scalar_type eta; +}; + +} +} } } From 462a9b32519421171252ba86bab04a727de2fcc2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 7 Feb 2025 14:37:25 +0700 Subject: [PATCH 018/140] triangle and rectangle shapes Signed-off-by: Corey --- examples_tests | 2 +- .../nbl/builtin/hlsl/shapes/rectangle.hlsl | 51 ++++++++++ include/nbl/builtin/hlsl/shapes/triangle.hlsl | 99 ++++++++++++++----- src/nbl/builtin/CMakeLists.txt | 2 +- 4 files changed, 129 insertions(+), 25 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/rectangle.hlsl diff --git a/examples_tests b/examples_tests index 5a5fbfe55a..85e67ad0c4 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5a5fbfe55aa4cf062c562f19507ba30de085b7a6 +Subproject commit 85e67ad0c4012d7d8d2014489327036d89b0bf57 diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl new file mode 100644 index 0000000000..854a326aaf --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +template +struct SphericalRectangle +{ + using scalar_type = T; + using vector3_type = vector; + using vector4_type = vector; + using matrix3x3_type = matrix; + + static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) + { + matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, isotropic_type::N)); + return nbl::hlsl::mul(TBN, rectangleOrigin - observer); + } + + scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector3_type) r0, NBL_CONST_REF_ARG(vector) rectangleExtents) + { + const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); + const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); + const vector4_type cosGamma = vec4( + -n_z[0] * n_z[1], + -n_z[1] * n_z[2], + -n_z[2] * n_z[3], + -n_z[3] * n_z[0] + ); + return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi; + } +} + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 4677b0e155..f7ce67a1c9 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -6,8 +6,8 @@ #define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ #include -#include -#include +#include +#include namespace nbl { @@ -16,28 +16,81 @@ namespace hlsl namespace shapes { -namespace util +template +struct SphericalTriangle { - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector compInternalAngle(NBL_CONST_REF_ARG(vector) e0, NBL_CONST_REF_ARG(vector) e1, NBL_CONST_REF_ARG(vector) e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); - - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } -} + using scalar_type = T; + using vector3_type = vector; + + static SphericalTriangle create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin) + { + SphericalTriangle retval; + retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); + retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); + retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); + return retval; + } + + bool pyramidAngles(NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides) + { + cos_sides = vector3_type(nbl::hlsl::dot(vertex1, vertex2), nbl::hlsl::dot(vertex2, vertex0), nbl::hlsl::dot(vertex0, vertex1)); + csc_sides = 1.0 / nbl::hlsl::sqrt((vector3_type)(1.f) - cos_sides * cos_sides); + return nbl::hlsl::any(csc_sides >= (vector3_type)(numeric_limits::max)); + } + + scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) + { + vector3_type cos_sides,csc_sides; + if (pyramidAngles(cos_sides, csc_sides)) + return 0.f; + + // these variables might eventually get optimized out + cos_a = cos_sides[0]; + cos_c = cos_sides[2]; + csc_b = csc_sides[1]; + csc_c = csc_sides[2]; + + // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI + cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices); + + return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]); + } + + scalar_type solidAngleOfTriangle() + { + vector3_type dummy0,dummy1; + scalar_type dummy2,dummy3,dummy4,dummy5; + return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); + } + + scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + { + if (pyramidAngles(cos_sides, csc_sides)) + return 0.f; + + vector3_type awayFromEdgePlane0 = nbl::hlsl::cross(vertex1, vertex2) * csc_sides[0]; + vector3_type awayFromEdgePlane1 = nbl::hlsl::cross(vertex2, vertex0) * csc_sides[1]; + vector3_type awayFromEdgePlane2 = nbl::hlsl::cross(vertex0, vertex1) * csc_sides[2]; + + // useless here but could be useful somewhere else + cos_vertices[0] = nbl::hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); + cos_vertices[1] = nbl::hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); + cos_vertices[2] = nbl::hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); + // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works + cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); + + matrix mat = + const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::transpose(awayFromEdgePlane) * receiverNormal); + + const vector3_type pyramidAngles = acos(cos_sides); + return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + } + + vector3_type vertex0; + vector3_type vertex1; + vector3_type vertex2; +}; } } diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 5ca0f3593f..882618879a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -253,7 +253,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/rectangle.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") From 7d5d70c892cd45492949ea6bf61b88650a408ca0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 10 Feb 2025 16:58:22 +0700 Subject: [PATCH 019/140] more sampling methods Signed-off-by: Corey --- examples_tests | 2 +- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 61 ++++++++ include/nbl/builtin/hlsl/sampling/linear.hlsl | 45 ++++++ .../projected_spherical_triangle.hlsl | 94 +++++++++++++ .../hlsl/sampling/spherical_triangle.hlsl | 132 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 4 + 6 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 include/nbl/builtin/hlsl/sampling/bilinear.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/linear.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl create mode 100644 include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl diff --git a/examples_tests b/examples_tests index 85e67ad0c4..2c500b1e06 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 85e67ad0c4012d7d8d2014489327036d89b0bf57 +Subproject commit 2c500b1e06e3e83b2a427bf0aa1ef27878467e0b diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl new file mode 100644 index 0000000000..1d5f9a91e8 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -0,0 +1,61 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct Bilinear +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + static Bilinear create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs) + { + Bilinear retval; + retval.bilinearCoeffs = bilinearCoeffs; + return retval; + } + + vector2_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + { + const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); + Linear lineary = Linear::create(twiceAreasUnderXCurve); + u.y = lineary.generate(u.y); + + const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); + Linear linearx = Linear::create(ySliceEndPoints); + u.x = linearx.generate(u.x); + + rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); + + return u; + } + + scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u) + { + return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); + } + + vector4_type bilinearCoeffs; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl new file mode 100644 index 0000000000..8b9b3fb058 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -0,0 +1,45 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct Linear +{ + using scalar_type = T; + using vector2_type = vector; + + static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) + { + Linear retval; + retval.linearCoeffs = linearCoeffs; + return retval; + } + + scalar_type generate(scalar_type u) + { + const scalar_type rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); + const vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; + return nbl::hlsl::abs(rcpDiff) < numeric_limits::max ? (linearCoeffs[0] - nbl::hlsl::sqrt(nbl::hlsl::mix(squaredCoeffs[0], squaredCoeffs[1], u))) * rcpDiff : u; + } + + vector2_type linearCoeffs; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl new file mode 100644 index 0000000000..5832e9aab2 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -0,0 +1,94 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_PROJECTED_SPHERICAL_TRIANGLE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct ProjectedSphericalTriangle +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + static ProjectedSphericalTriangle create(NBL_CONST_REG_ARG(shapes::SphericalTriangle) tri) + { + ProjectedSphericalTriangle retval; + retval.tri = tri; + return retval; + } + + vector4_type computeBilinearPatch(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF) + { + const scalar_type minimumProjSolidAngle = 0.0; + + matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle); + + return bxdfPdfAtVertex.yyxz; + } + + vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u) + { + // pre-warp according to proj solid angle approximation + vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); + Bilinear bilinear = Bilinear::create(patch); + u = bilinear.generate(rcpPdf, u); + + // now warp the points onto a spherical triangle + const vector3_type L = tri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + rcpPdf *= solidAngle; + + return L; + } + + vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u) + { + scalar_type cos_a, cos_c, csc_b, csc_c; + vector3_type cos_vertices, sin_vertices; + const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); + return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); + } + + scalar_type pdf(scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L) + { + scalar_type pdf; + const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + + vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); + Bilinear bilinear = Bilinear::create(patch); + return pdf * bilinear.pdf(u); + } + + scalar_type pdf(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L) + { + scalar_type pdf; + const vector2_type u = tri.generateInverse(pdf, L); + + vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); + Bilinear bilinear = Bilinear::create(patch); + return pdf * bilinear.pdf(u); + } + + shapes::SphericalTriangle tri; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl new file mode 100644 index 0000000000..9501cdc3d1 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -0,0 +1,132 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_TRIANGLE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct SphericalTriangle +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + + static SphericalTriangle create(NBL_CONST_REG_ARG(shapes::SphericalTriangle) tri) + { + SphericalTriangle retval; + retval.tri = tri; + return retval; + } + + vector3_type slerp_delta(NBL_CONST_REF_ARG(vector3_type) start, NBL_CONST_REF_ARG(vector3_type) preScaledWaypoint, scalar_type cosAngleFromStart) + { + vector3_type planeNormal = nbl::hlsl::cross(start,preScaledWaypoint); + + cosAngleFromStart *= 0.5; + const scalar_type sinAngle = nbl::hlsl::sqrt(0.5 - cosAngleFromStart); + const scalar_type cosAngle = nbl::hlsl::sqrt(0.5 + cosAngleFromStart); + + planeNormal *= sinAngle; + const vector3_type precompPart = nbl::hlsl::cross(planeNormal, start) * 2.0; + + return precompPart * cosAngle + nbl::hlsl::cross(planeNormal, precompPart); + } + + // WARNING: can and will return NAN if one or three of the triangle edges are near zero length + vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u) + { + scalar_type negSinSubSolidAngle,negCosSubSolidAngle; + math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); + + const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; + const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; + + // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful + scalar_type u_ = q - cos_vertices[0]; + scalar_type v_ = p + sin_vertices[0] * cos_c; + + // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors + vector3_type C_s = tri.vertex0; + if (csc_b < numeric_limits::max) + { + const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); + if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) + C_s += slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); + } + + vector3_type retval = tri.vertex1; + const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertex1); + const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); + if (csc_b_s < numeric_limits::max) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); + if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) + retval += slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); + } + return retval; + } + + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + { + scalar_type cos_a, cos_c, csc_b, csc_c; + vector3_type cos_vertices, sin_vertices; + + rcpPdf = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); + + return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + } + + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L) + { + pdf = 1.0 / solidAngle; + + const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertex1); + const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); + const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertex0); + + const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; + const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); + + const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_; + const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); + + const scalar_type subTriSolidAngleRatio = math::getArccosSumofABC_minus_PI(cos_vertices[0], cosB_, cosC_, sin_vertices[0], sinB_, sinC_) * pdf; + const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; + + const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); + const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < asfloat(0x3f7fffff) ? cosBC_s : cos_c)); + + return vector2_type(u,v); + } + + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L) + { + scalar_type cos_a, cos_c, csc_b, csc_c; + vector3_type cos_vertices, sin_vertices; + + const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); + + return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + } + + shapes::SphericalTriangle tri; +}; + +} +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 882618879a..93ac09c52b 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -255,9 +255,13 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/rectangle.hlsl") #sampling +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/projected_spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") From 15801af997c55f361383eda79079b5b4c0f75965 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 11 Feb 2025 14:03:51 +0700 Subject: [PATCH 020/140] spherical rectangle Signed-off-by: Corey --- examples_tests | 2 +- .../hlsl/sampling/spherical_rectangle.hlsl | 86 +++++++++++++++++++ .../nbl/builtin/hlsl/shapes/rectangle.hlsl | 15 +++- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 2 +- src/nbl/builtin/CMakeLists.txt | 1 + 5 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl diff --git a/examples_tests b/examples_tests index 2c500b1e06..e6a99165c1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 2c500b1e06e3e83b2a427bf0aa1ef27878467e0b +Subproject commit e6a99165c1b153977192f9722381fc24f566c9ca diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl new file mode 100644 index 0000000000..83224bfabd --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -0,0 +1,86 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_RECTANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_SPHERICAL_RECTANGLE_INCLUDED_ + +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct SphericalRectangle +{ + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + static SphericalRectangle create(NBL_CONST_REG_ARG(shapes::SphericalRectangle) rect) + { + SphericalRectangle retval; + retval.rect = rect; + return retval; + } + + vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) + { + const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); + const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt(vector4_type(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); + const vector4_type cosGamma = vector4_type( + -n_z[0] * n_z[1], + -n_z[1] * n_z[2], + -n_z[2] * n_z[3], + -n_z[3] * n_z[0] + ); + + scalar_type p = math::getSumofArccosAB(cosGamma[0], cosGamma[1]); + scalar_type q = math::getSumofArccosAB(cosGamma[2], cosGamma[3]); + + const scalar_type k = 2 * numbers::pi - q; + const scalar_type b0 = n_z[0]; + const scalar_type b1 = n_z[2]; + S = p + q - 2 * numbers::pi; + + const scalar_type CLAMP_EPS = 1e-5f; + + // flip z axsis if rect.r0.z > 0 + const uint32_t zFlipMask = (asuint(rect.r0.z) ^ 0x80000000u) & 0x80000000u; + rect.r0.z = asfloat(asuint(rect.r0.z) ^ zFlipMask); + vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); + + const scalar_type au = uv.x * S + k; + const scalar_type fu = (nbl::hlsl::cos(au) * b0 - b1) / nbl::hlsl::sin(au); + const scalar_type cu_2 = nbl::hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] + const scalar_type cu = asfloat(asuint(1.0 / nbl::hlsl::sqrt(cu_2)) ^ (asuint(fu) & 0x80000000u)); + + scalar_type xu = -(cu * rect.r0.z) * 1.0 / nbl::hlsl::sqrt(1 - cu * cu); + xu = nbl::hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs + const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; + const scalar_type d = nbl::hlsl::sqrt(d_2); + + const scalar_type h0 = rect.r0.y / nbl::hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); + const scalar_type h1 = r1.y / nbl::hlsl::sqrt(d_2 + r1.y * r1.y); + const scalar_type hv = h0 + uv.y * (h1 - h0), hv2 = hv * hv; + const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / nbl::hlsl::sqrt(1 - hv2) : r1.y; + + return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); + } + + shapes::SphericalRectangle rect; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl index 854a326aaf..a61f23cafa 100644 --- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -24,13 +24,22 @@ struct SphericalRectangle using vector4_type = vector; using matrix3x3_type = matrix; + static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) + { + SphericalRectangle retval; + retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); + return retval; + } + static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) { + SphericalRectangle retval; matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, isotropic_type::N)); - return nbl::hlsl::mul(TBN, rectangleOrigin - observer); + retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer); + return retval; } - scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector3_type) r0, NBL_CONST_REF_ARG(vector) rectangleExtents) + scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector) rectangleExtents) { const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); @@ -42,6 +51,8 @@ struct SphericalRectangle ); return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi; } + + vector3_type r0; } } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index f7ce67a1c9..59ba508596 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -49,7 +49,7 @@ struct SphericalTriangle cos_c = cos_sides[2]; csc_b = csc_sides[1]; csc_c = csc_sides[2]; - + // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 93ac09c52b..58a75022cf 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -262,6 +262,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/projected_spherical_triangle.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/uniform.hlsl") # LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ndarray_addressing.hlsl") From 7419893e9f146569a51adec47bc8e841de6697be Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 17 Feb 2025 16:58:42 +0700 Subject: [PATCH 021/140] fix aniso cache bug --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/common.hlsl | 49 +++++++++++------------ 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/examples_tests b/examples_tests index e6a99165c1..159d1533e8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit e6a99165c1b153977192f9722381fc24f566c9ca +Subproject commit 159d1533e8d82e3c5e82165e8b79ea67c0f23111 diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index 1a8e6d3086..d9a2620818 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -759,7 +759,11 @@ struct SAnisotropicMicrofacetCache using scalar_type = typename IsoCache::scalar_type; using vector3_type = vector; using matrix3x3_type = matrix; - using monochrome_type = vector; + + using ray_dir_info_type = ray_dir_info::SBasic; + using anisotropic_type = surface_interactions::SAnisotropic; + using isocache_type = SIsotropicMicrofacetCache; + using sample_type = SLightSample; // always valid by construction static this_t createForReflection(const vector3_type tangentSpaceV, const vector3_type tangentSpaceH) @@ -819,11 +823,15 @@ struct SAnisotropicMicrofacetCache NBL_CONST_REF_ARG(fresnel::OrientedEtas) orientedEtas, NBL_REF_ARG(vector3_type) H ) { - this_t retval; - retval.iso_cache = isocache_type::create(V,L,N,orientedEtas,H); - retval.TdotH = nbl::hlsl::dot(T,H); - retval.BdotH = nbl::hlsl::dot(B,H); - return retval; + isocache_type iso = (isocache_type)retval; + const bool valid = isocache_type::compute(iso,transmitted,V,L,N,NdotL,VdotL,orientedEta,rcpOrientedEta,H); + retval = (this_t)iso; + if (valid) + { + retval.TdotH = nbl::hlsl::dot(T,H); + retval.BdotH = nbl::hlsl::dot(B,H); + } + return valid; } template && LightSample) static this_t create( @@ -832,27 +840,16 @@ struct SAnisotropicMicrofacetCache NBL_CONST_REF_ARG(fresnel::OrientedEtas) orientedEtas ) { - this_t retval; + isocache_type iso = (isocache_type)retval; vector3_type H; - retval.iso_cache = isocache_type::template create(interaction.isotropic,_sample,orientedEtas,H); - retval.TdotH = nbl::hlsl::dot(interaction.getT(),H); - retval.BdotH = nbl::hlsl::dot(interaction.getB(),H); - return retval; - } - static this_t createPartial( - const scalar_type VdotH, const scalar_type LdotH, const scalar_type NdotH, - bool transmitted, NBL_CONST_REF_ARG(fresnel::OrientedEtaRcps) rcpOrientedEta - ) - { - this_t retval; - retval.iso_cache.VdotH = VdotH; - retval.iso_cache.LdotH = LdotH; - retval.iso_cache.VdotL = hlsl::mix(scalar_type(2.0) * VdotH * VdotH - scalar_type(1.0), - VdotH * (VdotH * rcpOrientedEta.value[0] + LdotH) - rcpOrientedEta.value[0], transmitted); - assert(NdotH > scalar_type(0.0)); - retval.iso_cache.absNdotH = hlsl::abs(NdotH); - retval.iso_cache.NdotH2 = NdotH * NdotH; - return retval; + const bool valid = isocache_type::compute(iso,interaction,_sample,eta,H); + retval = (this_t)iso; + if (valid) + { + retval.TdotH = nbl::hlsl::dot(interaction.T,H); + retval.BdotH = nbl::hlsl::dot(interaction.B,H); + } + return valid; } void fillTangents(const vector3_type T, const vector3_type B, const vector3_type H) From cf2476d1b1b9c4f79bf368d424528252daeb1cfe Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 18 Feb 2025 15:24:42 +0700 Subject: [PATCH 022/140] init func to modify bxdf params directly Signed-off-by: Corey --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/common.hlsl | 9 +++-- include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 34 +++++++++++++++--- .../nbl/builtin/hlsl/bxdf/transmission.hlsl | 36 ++++++++++++++++--- 4 files changed, 66 insertions(+), 15 deletions(-) diff --git a/examples_tests b/examples_tests index 159d1533e8..a7350db7d7 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 159d1533e8d82e3c5e82165e8b79ea67c0f23111 +Subproject commit a7350db7d7e422fa5086982b3327103c06cfbe44 diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index d9a2620818..a0b1f6e82b 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -1255,11 +1255,10 @@ template Axy; - Spectrum ior0; - Spectrum ior1; - Scalar eta; + vector A; // roughness + Spectrum ior0; // source ior + Spectrum ior1; // destination ior + Scalar eta; // in most cases, eta will be calculated from ior0 and ior1; see monochromeEta in pathtracer.hlsl Spectrum eta2; Spectrum luminosityContributionHint; }; diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl index f9c90a1373..c9a8c4d63a 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl @@ -52,6 +52,11 @@ struct SLambertianBxDF return create(); } + void init(SBxDFCreationParams params) + { + // do nothing + } + scalar_type __eval_pi_factored_out(scalar_type maxNdotL) { return maxNdotL; @@ -112,7 +117,12 @@ struct SOrenNayarBxDF static this_t create(SBxDFCreationParams params) { - return create(params.A); + return create(params.A.x); + } + + void init(SBxDFCreationParams params) + { + A = params.A.x; } scalar_type __rec_pi_factored_out_wo_clamps(scalar_type VdotL, scalar_type maxNdotL, scalar_type maxNdotV) @@ -343,9 +353,16 @@ struct SBeckmannBxDF static this_t create(SBxDFCreationParams params) { if (params.is_aniso) - return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1); + return create(params.A.x, params.A.y, params.ior0, params.ior1); else - return create(params.A, params.ior0, params.ior1); + return create(params.A.x, params.ior0, params.ior1); + } + + void init(SBxDFCreationParams params) + { + A = params.A; + ior0 = params.ior0; + ior1 = params.ior1; } scalar_type __eval_DG_wo_clamps(params_t params) @@ -579,9 +596,16 @@ struct SGGXBxDF static this_t create(SBxDFCreationParams params) { if (params.is_aniso) - return create(params.Axy.x, params.Axy.y, params.ior0, params.ior1); + return create(params.A.x, params.A.y, params.ior0, params.ior1); else - return create(params.A, params.ior0, params.ior1); + return create(params.A.x, params.ior0, params.ior1); + } + + void init(SBxDFCreationParams params) + { + A = params.A; + ior0 = params.ior0; + ior1 = params.ior1; } scalar_type __eval_DG_wo_clamps(params_t params) diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl index f34d962b93..287392b7e9 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl @@ -58,6 +58,11 @@ struct SLambertianBxDF return create(); } + void init(SBxDFCreationParams params) + { + // do nothing + } + scalar_type __eval_pi_factored_out(scalar_type absNdotL) { return absNdotL; @@ -124,6 +129,11 @@ struct SSmoothDielectricBxDF return create(params.eta); } + void init(SBxDFCreationParams params) + { + eta = params.eta; + } + spectral_type eval(params_t params) { return (spectral_type)0; @@ -210,6 +220,12 @@ struct SSmoothDielectricBxDF return create(params.eta2, params.luminosityContributionHint); } + void init(SBxDFCreationParams params) + { + eta2 = params.eta2; + luminosityContributionHint = params.luminosityContributionHint; + } + spectral_type eval(params_t params) { return (spectral_type)0; @@ -307,9 +323,15 @@ struct SBeckmannDielectricBxDF static this_t create(SBxDFCreationParams params) { if (params.is_aniso) - return create(params.eta, params.Axy.x, params.Axy.y); + return create(params.eta, params.A.x, params.A.y); else - return create(params.eta, params.A); + return create(params.eta, params.A.x); + } + + void init(SBxDFCreationParams params) + { + A = params.A; + eta = params.eta; } spectral_type eval(params_t params) @@ -482,9 +504,15 @@ struct SGGXDielectricBxDF static this_t create(SBxDFCreationParams params) { if (params.is_aniso) - return create(params.eta, params.Axy.x, params.Axy.y); + return create(params.eta, params.A.x, params.A.y); else - return create(params.eta, params.A); + return create(params.eta, params.A.x); + } + + void init(SBxDFCreationParams params) + { + A = params.A; + eta = params.eta; } spectral_type eval(params_t params) From 535e18d32c585e0ed87bfa20c2b3404590487e56 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 20 Feb 2025 16:55:35 +0700 Subject: [PATCH 023/140] bug fixes --- examples_tests | 2 +- include/nbl/builtin/hlsl/bxdf/common.hlsl | 36 +-- include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl | 291 ++++++++++++++++++ include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 8 +- .../nbl/builtin/hlsl/bxdf/transmission.hlsl | 9 +- .../hlsl/sampling/box_muller_transform.hlsl | 2 +- .../projected_spherical_triangle.hlsl | 12 +- .../hlsl/sampling/spherical_triangle.hlsl | 2 +- .../nbl/builtin/hlsl/shapes/rectangle.hlsl | 22 +- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 4 +- 10 files changed, 338 insertions(+), 50 deletions(-) create mode 100644 include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl diff --git a/examples_tests b/examples_tests index a7350db7d7..159d1533e8 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit a7350db7d7e422fa5086982b3327103c06cfbe44 +Subproject commit 159d1533e8d82e3c5e82165e8b79ea67c0f23111 diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index a0b1f6e82b..4e5ba09551 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -1122,27 +1122,21 @@ NBL_CONCEPT_END( #undef bxdf #include -#define NBL_CONCEPT_NAME IsotropicMicrofacetBRDF -#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) -#define NBL_CONCEPT_TPLT_PRM_NAMES (T) -#define NBL_CONCEPT_PARAM_0 (bxdf, T) -#define NBL_CONCEPT_PARAM_1 (iso, typename T::isotropic_interaction_type) -#define NBL_CONCEPT_PARAM_2 (u, vector) -#define NBL_CONCEPT_PARAM_3 (isocache, typename T::isocache_type) -NBL_CONCEPT_BEGIN(4) -#define bxdf NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 -#define iso NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define u NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 -#define isocache NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_3 -NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::iso_microfacet_bxdf_common, T)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((bxdf.generate(iso,u,isocache)), ::nbl::hlsl::is_same_v, typename T::sample_type)) -); -#undef isocache -#undef u -#undef iso -#undef bxdf -#include + template && surface_interactions::Anisotropic) + static SBxDFParams create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE) + { + this_t retval; + retval.NdotV = clamp == BCM_ABS ? abs(interaction.NdotV) : + clamp == BCM_MAX ? max(interaction.NdotV, 0.0) : + interaction.NdotV; + retval.uNdotV = interaction.NdotV; + retval.NdotV2 = interaction.NdotV2; + retval.NdotL = clamp == BCM_ABS ? abs(_sample.NdotL) : + clamp == BCM_MAX ? max(_sample.NdotL, 0.0) : + _sample.NdotL; + retval.uNdotL = _sample.NdotL; + retval.NdotL2 = _sample.NdotL2; + retval.VdotL = _sample.VdotL; #define NBL_CONCEPT_NAME IsotropicMicrofacetBSDF #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) diff --git a/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl b/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl new file mode 100644 index 0000000000..5a6f6cdf26 --- /dev/null +++ b/include/nbl/builtin/hlsl/bxdf/geom_smith.hlsl @@ -0,0 +1,291 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BXDF_GEOM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BXDF_GEOM_INCLUDED_ + +#include "nbl/builtin/hlsl/bxdf/ndf.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace bxdf +{ +namespace smith +{ + +template +typename NDF::scalar_type VNDF_pdf_wo_clamps(typename NDF::scalar_type ndf, typename NDF::scalar_type lambda_V, typename NDF::scalar_type maxNdotV, NBL_REF_ARG(typename NDF::scalar_type) onePlusLambda_V) +{ + onePlusLambda_V = 1.0 + lambda_V; + ndf::microfacet_to_light_measure_transform transform = ndf::microfacet_to_light_measure_transform::create(ndf / onePlusLambda_V, maxNdotV); + return transform(); +} + +template +typename NDF::scalar_type VNDF_pdf_wo_clamps(typename NDF::scalar_type ndf, typename NDF::scalar_type lambda_V, typename NDF::scalar_type absNdotV, bool transmitted, typename NDF::scalar_type VdotH, typename NDF::scalar_type LdotH, typename NDF::scalar_type VdotHLdotH, typename NDF::scalar_type orientedEta, typename NDF::scalar_type reflectance, NBL_REF_ARG(typename NDF::scalar_type) onePlusLambda_V) +{ + onePlusLambda_V = 1.0 + lambda_V; + ndf::microfacet_to_light_measure_transform transform + = ndf::microfacet_to_light_measure_transform::create((transmitted ? (1.0 - reflectance) : reflectance) * ndf / onePlusLambda_V, absNdotV, transmitted, VdotH, LdotH, VdotHLdotH, orientedEta); + return transform(); +} + +template) +T VNDF_pdf_wo_clamps(T ndf, T G1_over_2NdotV) +{ + return ndf * 0.5 * G1_over_2NdotV; +} + +template) +T FVNDF_pdf_wo_clamps(T fresnel_ndf, T G1_over_2NdotV, T absNdotV, bool transmitted, T VdotH, T LdotH, T VdotHLdotH, T orientedEta) +{ + T FNG = fresnel_ndf * G1_over_2NdotV; + T factor = 0.5; + if (transmitted) + { + const T VdotH_etaLdotH = (VdotH + orientedEta * LdotH); + // VdotHLdotH is negative under transmission, so this factor is negative + factor *= -2.0 * VdotHLdotH / (VdotH_etaLdotH * VdotH_etaLdotH); + } + return FNG * factor; +} + +template) +T VNDF_pdf_wo_clamps(T ndf, T G1_over_2NdotV, T absNdotV, bool transmitted, T VdotH, T LdotH, T VdotHLdotH, T orientedEta, T reflectance) +{ + T FN = (transmitted ? (1.0 - reflectance) : reflectance) * ndf; + return FVNDF_pdf_wo_clamps(FN, G1_over_2NdotV, absNdotV, transmitted, VdotH, LdotH, VdotHLdotH, orientedEta); +} + + +template) +struct SIsotropicParams +{ + using this_t = SIsotropicParams; + + static this_t create(T a2, T NdotV2, T NdotL2, T lambdaV_plus_one) // beckmann + { + this_t retval; + retval.a2 = a2; + retval.NdotV2 = NdotV2; + retval.NdotL2 = NdotL2; + retval.lambdaV_plus_one = lambdaV_plus_one; + return retval; + } + + static this_t create(T a2, T NdotV, T NdotV2, T NdotL, T NdotL2) // ggx + { + this_t retval; + retval.a2 = a2; + retval.NdotV = NdotV; + retval.NdotV2 = NdotV2; + retval.NdotL = NdotL; + retval.NdotL2 = NdotL2; + retval.one_minus_a2 = 1.0 - a2; + return retval; + } + + T a2; + T NdotV; + T NdotL; + T NdotV2; + T NdotL2; + T lambdaV_plus_one; + T one_minus_a2; +}; + +template) +struct SAnisotropicParams +{ + using this_t = SAnisotropicParams; + + static this_t create(T ax2, T ay2, T TdotV2, T BdotV2, T NdotV2, T TdotL2, T BdotL2, T NdotL2, T lambdaV_plus_one) // beckmann + { + this_t retval; + retval.ax2 = ax2; + retval.ay2 = ay2; + retval.TdotV2 = TdotV2; + retval.BdotV2 = BdotV2; + retval.NdotV2 = NdotV2; + retval.TdotL2 = TdotL2; + retval.BdotL2 = BdotL2; + retval.NdotL2 = NdotL2; + retval.lambdaV_plus_one = lambdaV_plus_one; + return retval; + } + + static this_t create(T ax2, T ay2, T NdotV, T TdotV2, T BdotV2, T NdotV2, T NdotL, T TdotL2, T BdotL2, T NdotL2) // ggx + { + this_t retval; + retval.ax2 = ax2; + retval.ay2 = ay2; + retval.NdotL = NdotL; + retval.NdotV = NdotV; + retval.TdotV2 = TdotV2; + retval.BdotV2 = BdotV2; + retval.NdotV2 = NdotV2; + retval.TdotL2 = TdotL2; + retval.BdotL2 = BdotL2; + retval.NdotL2 = NdotL2; + return retval; + } + + T ax2; + T ay2; + T NdotV; + T NdotL; + T TdotV2; + T BdotV2; + T NdotV2; + T TdotL2; + T BdotL2; + T NdotL2; + T lambdaV_plus_one; +}; + + +// beckmann +template) +struct Beckmann +{ + using scalar_type = T; + + scalar_type G1(scalar_type lambda) + { + return 1.0 / (1.0 + lambda); + } + + scalar_type C2(scalar_type NdotX2, scalar_type a2) + { + return NdotX2 / (a2 * (1.0 - NdotX2)); + } + + scalar_type C2(scalar_type TdotX2, scalar_type BdotX2, scalar_type NdotX2, scalar_type ax2, scalar_type ay2) + { + return NdotX2 / (TdotX2 * ax2 + BdotX2 * ay2); + } + + scalar_type Lambda(scalar_type c2) + { + scalar_type c = sqrt(c2); + scalar_type nom = 1.0 - 1.259 * c + 0.396 * c2; + scalar_type denom = 2.181 * c2 + 3.535 * c; + return nbl::hlsl::mix(0.0, nom / denom, c < 1.6); + } + + scalar_type Lambda(scalar_type NdotX2, scalar_type a2) + { + return Lambda(C2(NdotX2, a2)); + } + + scalar_type Lambda(scalar_type TdotX2, scalar_type BdotX2, scalar_type NdotX2, scalar_type ax2, scalar_type ay2) + { + return Lambda(C2(TdotX2, BdotX2, NdotX2, ax2, ay2)); + } + + scalar_type correlated(SIsotropicParams params) + { + scalar_type c2 = C2(params.NdotV2, params.a2); + scalar_type L_v = Lambda(c2); + c2 = C2(params.NdotL2, params.a2); + scalar_type L_l = Lambda(c2); + return G1(L_v + L_l); + } + + scalar_type correlated(SAnisotropicParams params) + { + scalar_type c2 = C2(params.TdotV2, params.BdotV2, params.NdotV2, params.ax2, params.ay2); + scalar_type L_v = Lambda(c2); + c2 = C2(params.TdotL2, params.BdotL2, params.NdotL2, params.ax2, params.ay2); + scalar_type L_l = Lambda(c2); + return G1(L_v + L_l); + } + + scalar_type G2_over_G1(SIsotropicParams params) + { + scalar_type lambdaL = Lambda(params.NdotL2, params.a2); + return params.lambdaV_plus_one / (params.lambdaV_plus_one + lambdaL); + } + + scalar_type G2_over_G1(SAnisotropicParams params) + { + scalar_type c2 = C2(params.TdotL2, params.BdotL2, params.NdotL2, params.ax2, params.ay2); + scalar_type lambdaL = Lambda(c2); + return params.lambdaV_plus_one / (params.lambdaV_plus_one + lambdaL); + } +}; + + +// ggx +template) +struct GGX +{ + using scalar_type = T; + + scalar_type devsh_part(scalar_type NdotX2, scalar_type a2, scalar_type one_minus_a2) + { + return sqrt(a2 + one_minus_a2 * NdotX2); + } + + scalar_type devsh_part(scalar_type TdotX2, scalar_type BdotX2, scalar_type NdotX2, scalar_type ax2, scalar_type ay2) + { + return sqrt(TdotX2 * ax2 + BdotX2 * ay2 + NdotX2); + } + + scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type NdotX2, scalar_type a2, scalar_type one_minus_a2) + { + return 1.0 / (NdotX + devsh_part(NdotX2,a2,one_minus_a2)); + } + + scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type TdotX2, scalar_type BdotX2, scalar_type NdotX2, scalar_type ax2, scalar_type ay2) + { + return 1.0 / (NdotX + devsh_part(TdotX2, BdotX2, NdotX2, ax2, ay2)); + } + + scalar_type G1_wo_numerator(scalar_type NdotX, scalar_type devsh_part) + { + return 1.0 / (NdotX + devsh_part); + } + + scalar_type correlated_wo_numerator(SIsotropicParams params) + { + scalar_type Vterm = params.NdotL * devsh_part(params.NdotV2, params.a2, params.one_minus_a2); + scalar_type Lterm = params.NdotV * devsh_part(params.NdotL2, params.a2, params.one_minus_a2); + return 0.5 / (Vterm + Lterm); + } + + scalar_type correlated_wo_numerator(SAnisotropicParams params) + { + scalar_type Vterm = params.NdotL * devsh_part(params.TdotV2, params.BdotV2, params.NdotV2, params.ax2, params.ay2); + scalar_type Lterm = params.NdotV * devsh_part(params.TdotL2, params.BdotL2, params.NdotL2, params.ax2, params.ay2); + return 0.5 / (Vterm + Lterm); + } + + scalar_type G2_over_G1(SIsotropicParams params) + { + scalar_type devsh_v = devsh_part(params.NdotV2, params.a2, params.one_minus_a2); + scalar_type G2_over_G1 = params.NdotL * (devsh_v + params.NdotV); // alternative `Vterm+NdotL*NdotV /// NdotL*NdotV could come as a parameter + G2_over_G1 /= params.NdotV * devsh_part(params.NdotL2, params.a2, params.one_minus_a2) + params.NdotL * devsh_v; + + return G2_over_G1; + } + + scalar_type G2_over_G1(SAnisotropicParams params) + { + scalar_type devsh_v = devsh_part(params.TdotV2, params.BdotV2, params.NdotV2, params.ax2, params.ay2); + scalar_type G2_over_G1 = params.NdotL * (devsh_v + params.NdotV); + G2_over_G1 /= params.NdotV * devsh_part(params.TdotL2, params.BdotL2, params.NdotL2, params.ax2, params.ay2) + params.NdotL * devsh_v; + + return G2_over_G1; + } + +}; + +} +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl index c9a8c4d63a..c64d0f3c4c 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl @@ -220,7 +220,7 @@ struct SBlinnPhongBxDF ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(params.NdotH, 1.0 / (1.0 - params.NdotH2), params.TdotH2, params.BdotH2, n.x, n.y); ndf::BlinnPhong blinn_phong; scalar_type DG = blinn_phong(ndfparams); - if (any>(a2 > (vector2_type)numeric_limits::min)) + if (any >(a2 > (vector2_type)numeric_limits::min)) { smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(a2.x, a2.y, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0); smith::Beckmann beckmann; @@ -233,7 +233,7 @@ struct SBlinnPhongBxDF ndf::SIsotropicParams ndfparams = ndf::SIsotropicParams::create(n, params.NdotH, params.NdotH2); ndf::BlinnPhong blinn_phong; scalar_type NG = blinn_phong(ndfparams); - if (any>(a2 > (vector2_type)numeric_limits::min)) + if (any >(a2 > (vector2_type)numeric_limits::min)) { smith::SIsotropicParams smithparams = smith::SIsotropicParams::create(a2.x, params.NdotV2, params.NdotL2, 0); smith::Beckmann beckmann; @@ -374,7 +374,7 @@ struct SBeckmannBxDF ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); ndf::Beckmann beckmann_ndf; scalar_type NG = beckmann_ndf(ndfparams); - if (any>(A > (vector2_type)numeric_limits::min)) + if (any >(A > (vector2_type)numeric_limits::min)) { smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.TdotV2, params.BdotV2, params.NdotV2, params.TdotL2, params.BdotL2, params.NdotL2, 0); smith::Beckmann beckmann_smith; @@ -617,7 +617,7 @@ struct SGGXBxDF ndf::SAnisotropicParams ndfparams = ndf::SAnisotropicParams::create(A.x, A.y, ax2, ay2, params.TdotH2, params.BdotH2, params.NdotH2); ndf::GGX ggx_ndf; scalar_type NG = ggx_ndf(ndfparams); - if (any>(A > (vector2_type)numeric_limits::min)) + if (any >(A > (vector2_type)numeric_limits::min)) { smith::SAnisotropicParams smithparams = smith::SAnisotropicParams::create(ax2, ay2, params.NdotV, params.TdotV2, params.BdotV2, params.NdotV2, params.NdotL, params.TdotL2, params.BdotL2, params.NdotL2); smith::GGX ggx_smith; diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl index 287392b7e9..77ce658016 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl @@ -100,8 +100,11 @@ struct SLambertianBxDF // microfacet bxdfs -template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) -struct SSmoothDielectricBxDF +template // NBL_FUNC_REQUIRES(Sample && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) // dxc won't let me put this in +struct SSmoothDielectricBxDF; + +template +struct SSmoothDielectricBxDF { using this_t = SSmoothDielectricBxDF; using scalar_type = typename LightSample::scalar_type; @@ -190,7 +193,7 @@ struct SSmoothDielectricBxDF scalar_type eta; }; -template && IsotropicMicrofacetCache && AnisotropicMicrofacetCache) +template struct SSmoothDielectricBxDF { using this_t = SSmoothDielectricBxDF; diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index efa8d66e2b..57a18589fd 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -24,4 +24,4 @@ vector boxMullerTransform(vector xi, T stddev) } } -#endif \ No newline at end of file +#endif diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 5832e9aab2..945ca053b8 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -25,14 +25,14 @@ struct ProjectedSphericalTriangle using vector3_type = vector; using vector4_type = vector; - static ProjectedSphericalTriangle create(NBL_CONST_REG_ARG(shapes::SphericalTriangle) tri) + static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) { ProjectedSphericalTriangle retval; retval.tri = tri; return retval; } - vector4_type computeBilinearPatch(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF) + vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF) { const scalar_type minimumProjSolidAngle = 0.0; @@ -42,7 +42,7 @@ struct ProjectedSphericalTriangle return bxdfPdfAtVertex.yyxz; } - vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) { // pre-warp according to proj solid angle approximation vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); @@ -56,7 +56,7 @@ struct ProjectedSphericalTriangle return L; } - vector3_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REG_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -64,7 +64,7 @@ struct ProjectedSphericalTriangle return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); } - scalar_type pdf(scalar_type solidAngle, NBL_CONST_REG_ARG(vector3_type) cos_vertices, NBL_CONST_REG_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L) + scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) { scalar_type pdf; const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); @@ -74,7 +74,7 @@ struct ProjectedSphericalTriangle return pdf * bilinear.pdf(u); } - scalar_type pdf(NBL_CONST_REG_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REG_ARG(vector3_type) L) + scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) { scalar_type pdf; const vector2_type u = tri.generateInverse(pdf, L); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 9501cdc3d1..1d4fda454d 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -24,7 +24,7 @@ struct SphericalTriangle using vector2_type = vector; using vector3_type = vector; - static SphericalTriangle create(NBL_CONST_REG_ARG(shapes::SphericalTriangle) tri) + static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) { SphericalTriangle retval; retval.tri = tri; diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl index a61f23cafa..47d3927f31 100644 --- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -16,25 +16,25 @@ namespace hlsl namespace shapes { -template +template struct SphericalRectangle { - using scalar_type = T; - using vector3_type = vector; - using vector4_type = vector; - using matrix3x3_type = matrix; + using scalar_type = Scalar; + using vector3_type = vector; + using vector4_type = vector; + using matrix3x3_type = matrix; - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) + static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) { - SphericalRectangle retval; + SphericalRectangle retval; retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); return retval; } - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) + static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) { - SphericalRectangle retval; - matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, isotropic_type::N)); + SphericalRectangle retval; + matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer); return retval; } @@ -53,7 +53,7 @@ struct SphericalRectangle } vector3_type r0; -} +}; } } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 59ba508596..d904ed7246 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -80,8 +80,8 @@ struct SphericalTriangle // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); - matrix mat = - const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::transpose(awayFromEdgePlane) * receiverNormal); + matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); + const vector3_type externalProducts = nbl::hlsl::abs(/* transposed already */awayFromEdgePlane * receiverNormal); const vector3_type pyramidAngles = acos(cos_sides); return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); From b45f0a8c76b4cbc2ec5332268db8a5acd9210565 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 21 Feb 2025 14:16:59 +0700 Subject: [PATCH 024/140] fix sampling bugs #2 Signed-off-by: Corey --- include/nbl/builtin/hlsl/sampling/bilinear.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl | 2 +- include/nbl/builtin/hlsl/sampling/linear.hlsl | 4 ++-- .../builtin/hlsl/sampling/projected_spherical_triangle.hlsl | 1 + include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 1d5f9a91e8..3542e2dfef 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -31,7 +31,7 @@ struct Bilinear return retval; } - vector2_type generate(NBL_REG_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) { const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); Linear lineary = Linear::create(twiceAreasUnderXCurve); diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 57a18589fd..dcac2279be 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -17,7 +17,7 @@ template vector boxMullerTransform(vector xi, T stddev) { T sinPhi, cosPhi; - nbl::hlsl::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); return vector(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; } diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 8b9b3fb058..12d445eefe 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ -#define _NBL_BUILTIN_HLSL_SAMPLING_BILINEAR_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_LINEAR_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_LINEAR_INCLUDED_ #include #include diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 945ca053b8..cfc96dc9cb 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace nbl diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 83224bfabd..c42bf8e464 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -25,7 +25,7 @@ struct SphericalRectangle using vector3_type = vector; using vector4_type = vector; - static SphericalRectangle create(NBL_CONST_REG_ARG(shapes::SphericalRectangle) rect) + static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) { SphericalRectangle retval; retval.rect = rect; From 9d1de1645e2d553d9437bf6dd697cbc54f771f5f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 25 Feb 2025 16:57:01 +0700 Subject: [PATCH 025/140] temporary fix for dxc bug issue 7154 Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/common.hlsl | 70 ++++++++++------------- include/nbl/builtin/hlsl/limits.hlsl | 2 +- 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index 4e5ba09551..e59a2c7de4 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -374,19 +374,17 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) ((NBL_CONCEPT_REQ_TYPE)(T::vector3_type)) ((NBL_CONCEPT_REQ_TYPE)(T::matrix3x3_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getL()), ::nbl::hlsl::is_same_v, typename T::ray_dir_info_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getTdotL()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getTdotL2()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getBdotL()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getBdotL2()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getNdotL(clampMode)), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getNdotL2()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.isValid()), ::nbl::hlsl::is_same_v, bool)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::createFromTangentSpace(rdirinfo,frame)), ::nbl::hlsl::is_same_v, T)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pV)), ::nbl::hlsl::is_same_v, T)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pV,pV,pV)), ::nbl::hlsl::is_same_v, T)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pV,pV,pNdotL)), ::nbl::hlsl::is_same_v, T)) - // ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create >(pV,inter)), ::nbl::hlsl::is_same_v, T)) // NOTE: temporarily commented out due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/7154 + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.L), ::nbl::hlsl::is_same_v, typename T::ray_dir_info_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.VdotL), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.TdotL), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.BdotL), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.NdotL), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.NdotL2), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::createFromTangentSpace(pV,rdirinfo,frame)), ::nbl::hlsl::is_same_v, T)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pVdotL,pV)), ::nbl::hlsl::is_same_v, T)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::create(rdirinfo,pVdotL,pV,pV,pV)), ::nbl::hlsl::is_same_v, T)) + //((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create(pV,iso)), ::nbl::hlsl::is_same_v, T)) // NOTE: temporarily commented out due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/7154 + //((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::template create(pV,aniso)), ::nbl::hlsl::is_same_v, T)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((_sample.getTangentSpaceL()), ::nbl::hlsl::is_same_v, typename T::vector3_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((T::createInvalid()), ::nbl::hlsl::is_same_v, T)) ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(ray_dir_info::Basic, typename T::ray_dir_info_type)) @@ -447,32 +445,22 @@ struct SLightSample return retval; } - static this_t create(NBL_CONST_REF_ARG(ray_dir_info_type) L, const vector3_type T, const vector3_type B, const scalar_type NdotL) - { - this_t retval; - - retval.L = L; - retval.TdotL = nbl::hlsl::dot(T,L.getDirection()); - retval.BdotL = nbl::hlsl::dot(B,L.getDirection()); - retval.NdotL = NdotL; - retval.NdotL2 = NdotL * NdotL; - - return retval; - } - - template) - static this_t create(const vector3_type L, NBL_CONST_REF_ARG(SurfaceInteraction) interaction) - { - const vector3_type V = interaction.V.getDirection(); - const scalar_type VdotL = nbl::hlsl::dot(V,L); - this_t retval; - NBL_IF_CONSTEXPR(surface_interactions::Anisotropic) - retval = create(L,interaction.T,interaction.B,interaction.N); - else - retval = create(L,interaction.N); - return retval; - } - + // overloads for surface_interactions, NOTE: temporarily commented out due to dxc bug https://github.com/microsoft/DirectXShaderCompiler/issues/7154 + // template + // static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SIsotropic) interaction) + // { + // const vector3_type V = interaction.V.getDirection(); + // const scalar_type VdotL = nbl::hlsl::dot(V,L); + // return create(L, VdotL, interaction.N); + // } + // template + // static this_t create(NBL_CONST_REF_ARG(vector3_type) L, NBL_CONST_REF_ARG(surface_interactions::SAnisotropic) interaction) + // { + // const vector3_type V = interaction.V.getDirection(); + // const scalar_type VdotL = nbl::hlsl::dot(V,L); + // return create(L,VdotL,interaction.T,interaction.B,interaction.N); + // } + // vector3_type getTangentSpaceL() NBL_CONST_MEMBER_FUNC { return vector3_type(TdotL, BdotL, NdotL); @@ -1123,7 +1111,7 @@ NBL_CONCEPT_END( #include template && surface_interactions::Anisotropic) - static SBxDFParams create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE) + static this_t create(LightSample _sample, Aniso interaction, BxDFClampMode clamp = BCM_NONE) { this_t retval; retval.NdotV = clamp == BCM_ABS ? abs(interaction.NdotV) : @@ -1184,7 +1172,7 @@ NBL_CONCEPT_END( } template && surface_interactions::Anisotropic && AnisotropicMicrofacetCache) - static SBxDFParams create(LightSample _sample, Aniso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE) + static this_t create(LightSample _sample, Aniso interaction, Cache cache, BxDFClampMode clamp = BCM_NONE) { this_t retval; retval.NdotH = cache.NdotH; diff --git a/include/nbl/builtin/hlsl/limits.hlsl b/include/nbl/builtin/hlsl/limits.hlsl index ebc6f931e1..fa9edc3bde 100644 --- a/include/nbl/builtin/hlsl/limits.hlsl +++ b/include/nbl/builtin/hlsl/limits.hlsl @@ -146,7 +146,7 @@ struct num_base : type_identity // (TODO) think about what this means for HLSL // identifies floating-point types that can represent the special value "quiet not-a-number" (NaN) - NBL_CONSTEXPR_STATIC_INLINE bool has_quiet_NaN = !is_integer; + NBL_CONSTEXPR_STATIC_INLINE bool has_quiet_NaN = !is_integer; // identifies floating-point types that can represent the special value "signaling not-a-number" (NaN) NBL_CONSTEXPR_STATIC_INLINE bool has_signaling_NaN = !is_integer; // identifies the denormalization style used by the floating-point type From 28fd99efccfe1298bbac0c07574e4793a0e5023b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 26 Feb 2025 16:55:23 +0700 Subject: [PATCH 026/140] some bug fixes again Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/reflection.hlsl | 31 +++++++++----- .../nbl/builtin/hlsl/bxdf/transmission.hlsl | 42 ++++++++++++------- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 3 +- .../hlsl/sampling/concentric_mapping.hlsl | 4 +- .../projected_spherical_triangle.hlsl | 12 +++--- .../hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../nbl/builtin/hlsl/shapes/rectangle.hlsl | 2 +- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 2 +- 8 files changed, 61 insertions(+), 37 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl index c64d0f3c4c..dd2e4e60a8 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection.hlsl @@ -16,8 +16,19 @@ namespace nbl namespace hlsl { -// After Clang-HLSL introduces https://en.cppreference.com/w/cpp/language/namespace_alias -// namespace brdf = bxdf::reflection; +// still need these? +template && surface_interactions::Isotropic && surface_interactions::Anisotropic && ray_dir_info::Basic && is_scalar_v) +LightSample cos_generate(NBL_CONST_REF_ARG(Iso) interaction) +{ + return LightSample(interaction.V.reflect(interaction.N,interaction.NdotV),interaction.NdotV,interaction.N); +} +template && surface_interactions::Isotropic && surface_interactions::Anisotropic && ray_dir_info::Basic && is_scalar_v) +LightSample cos_generate(NBL_CONST_REF_ARG(Aniso) interaction) +{ + return LightSample(interaction.V.reflect(interaction.N,interaction.NdotV),interaction.NdotV,interaction.T,interaction.B,interaction.N); +} // for information why we don't check the relation between `V` and `L` or `N` and `H`, see comments for `nbl::hlsl::transmission::cos_quotient_and_pdf` template && is_floating_point_v) @@ -429,14 +440,14 @@ struct SBeckmannBxDF scalar_type sinTheta = sqrt(1.0 - cosTheta * cosTheta); scalar_type tanTheta = sinTheta / cosTheta; scalar_type cotTheta = 1.0 / tanTheta; - + scalar_type a = -1.0; scalar_type c = erf(cosTheta); scalar_type sample_x = max(u.x, 1.0e-6); scalar_type theta = acos(cosTheta); scalar_type fit = 1.0 + theta * (-0.876 + theta * (0.4265 - 0.0594*theta)); scalar_type b = c - (1.0 + c) * pow(1.0-sample_x, fit); - + scalar_type normalization = 1.0 / (1.0 + c + numbers::inv_sqrtpi * tanTheta * exp(-cosTheta*cosTheta)); const int ITER_THRESHOLD = 10; @@ -463,7 +474,7 @@ struct SBeckmannBxDF slope.x = erfInv(b); slope.y = erfInv(2.0 * max(u.y, 1.0e-6) - 1.0); } - + scalar_type sinTheta = sqrt(1.0 - V.z*V.z); scalar_type cosPhi = sinTheta==0.0 ? 1.0 : clamp(V.x/sinTheta, -1.0, 1.0); scalar_type sinPhi = sinTheta==0.0 ? 0.0 : clamp(V.y/sinTheta, -1.0, 1.0); @@ -482,7 +493,7 @@ struct SBeckmannBxDF { const vector3_type localV = interaction.getTangentSpaceV(); const vector3_type H = __generate(localV, u); - + cache = anisocache_type::create(localV, H); ray_dir_info_type localL; localL.direction = math::reflect(localV, H, cache.VdotH); @@ -546,7 +557,7 @@ struct SBeckmannBxDF const spectral_type reflectance = fresnelConductor(ior0, ior1, params.VdotH); quo = reflectance * G2_over_G1; } - + return quotient_pdf_type::create(quo, _pdf); } @@ -667,7 +678,7 @@ struct SGGXBxDF scalar_type t2 = r * sin(phi); scalar_type s = 0.5 * (1.0 + V.z); t2 = (1.0 - s)*sqrt(1.0 - t1*t1) + s*t2; - + //reprojection onto hemisphere //TODO try it wothout the max(), not sure if -t1*t1-t2*t2>-1.0 vector3_type H = t1*T1 + t2*T2 + sqrt(max(0.0, 1.0-t1*t1-t2*t2))*V; @@ -679,7 +690,7 @@ struct SGGXBxDF { const vector3_type localV = interaction.getTangentSpaceV(); const vector3_type H = __generate(localV, u); - + cache = anisocache_type::create(localV, H); ray_dir_info_type localL; localL.direction = math::reflect(localV, H, cache.VdotH); @@ -741,7 +752,7 @@ struct SGGXBxDF const spectral_type reflectance = fresnelConductor(ior0, ior1, params.VdotH); quo = reflectance * G2_over_G1; } - + return quotient_pdf_type::create(quo, _pdf); } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl index 77ce658016..4087b715c3 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission.hlsl @@ -17,8 +17,18 @@ namespace nbl namespace hlsl { -// After Clang-HLSL introduces https://en.cppreference.com/w/cpp/language/namespace_alias -// namespace bsdf = bxdf::transmission; +template && surface_interactions::Isotropic && surface_interactions::Anisotropic && ray_dir_info::Basic && is_scalar_v) +LightSample cos_generate(NBL_CONST_REF_ARG(Iso) interaction) +{ + return LightSample(interaction.V.transmit(),-1.f,interaction.N); +} +template && surface_interactions::Isotropic && surface_interactions::Anisotropic && ray_dir_info::Basic && is_scalar_v) +LightSample cos_generate(NBL_CONST_REF_ARG(Aniso) interaction) +{ + return LightSample(interaction.V.transmit(),-1.f,interaction.T,interaction.B,interaction.N); +} // Why don't we check that the incoming and outgoing directions equal each other // (or similar for other delta distributions such as reflect, or smooth [thin] dielectrics): @@ -159,7 +169,7 @@ struct SSmoothDielectricBxDF scalar_type orientedEta, rcpOrientedEta; const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); bool dummy; - return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, interaction.NdotV, interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy); } @@ -168,7 +178,7 @@ struct SSmoothDielectricBxDF scalar_type orientedEta, rcpOrientedEta; const bool backside = math::getOrientedEtas(orientedEta, rcpOrientedEta, interaction.NdotV, eta); bool dummy; - return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, + return __generate_wo_clamps(interaction.V.direction, interaction.T, interaction.B, interaction.N, backside, interaction.NdotV, nbl::hlsl::abs(interaction.NdotV), interaction.NdotV*interaction.NdotV, u, rcpOrientedEta, orientedEta*orientedEta, rcpOrientedEta*rcpOrientedEta, dummy); } @@ -181,7 +191,7 @@ struct SSmoothDielectricBxDF quotient_pdf_type quotient_and_pdf(params_t params) { const bool transmitted = isTransmissionPath(params.uNdotV, params.uNdotL); - + scalar_type dummy, rcpOrientedEta; const bool backside = math::getOrientedEtas(dummy, rcpOrientedEta, params.NdotV, eta); @@ -235,7 +245,7 @@ struct SSmoothDielectricBxDF } // usually `luminosityContributionHint` would be the Rec.709 luma coefficients (the Y row of the RGB to CIE XYZ matrix) - // its basically a set of weights that determine + // its basically a set of weights that determine // assert(1.0==luminosityContributionHint.r+luminosityContributionHint.g+luminosityContributionHint.b); // `remainderMetadata` is a variable which the generator function returns byproducts of sample generation that would otherwise have to be redundantly calculated `quotient_and_pdf` sample_type __generate_wo_clamps(vector3_type V, vector3_type T, vector3_type B, vector3_type N, scalar_type NdotV, scalar_type absNdotV, NBL_REF_ARG(vector3_type) u, spectral_type eta2, spectral_type luminosityContributionHint, NBL_REF_ARG(spectral_type) remainderMetadata) @@ -249,7 +259,7 @@ struct SSmoothDielectricBxDF scalar_type rcpChoiceProb; const bool transmitted = math::partitionRandVariable(reflectionProb, u.z, rcpChoiceProb); remainderMetadata = (transmitted ? ((spectral_type)(1.0) - reflectance) : reflectance) * rcpChoiceProb; - + ray_dir_info_type L; L.direction = (transmitted ? (vector3_type)(0.0) : N * 2.0f * NdotV) - V; return sample_type::create(L, nbl::hlsl::dot(V, L.direction), T, B, N); @@ -342,7 +352,7 @@ struct SBeckmannDielectricBxDF scalar_type orientedEta, dummy; const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); const scalar_type orientedEta2 = orientedEta * orientedEta; - + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; const bool transmitted = VdotHLdotH < 0.0; @@ -363,10 +373,10 @@ struct SBeckmannDielectricBxDF { const scalar_type localVdotH = nbl::hlsl::dot(localV,H); const scalar_type reflectance = fresnelDielectric_common(orientedEta2,nbl::hlsl::abs(localVdotH)); - + scalar_type rcpChoiceProb; bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); - + cache = anisocache_type::create(localV, H); const scalar_type VdotH = cache.VdotH; @@ -409,7 +419,7 @@ struct SBeckmannDielectricBxDF const bool transmitted = VdotHLdotH < 0.0; const scalar_type reflectance = fresnelDielectric_common(orientedEta2, nbl::hlsl::abs(params.VdotH)); - + scalar_type ndf, lambda; if (params.is_aniso) { @@ -433,7 +443,7 @@ struct SBeckmannDielectricBxDF smith::Beckmann beckmann_smith; lambda = beckmann_smith.Lambda(params.NdotV2, a2); } - + return smith::VNDF_pdf_wo_clamps >(ndf,lambda,params.NdotV,transmitted,params.VdotH,params.LdotH,VdotHLdotH,orientedEta,reflectance,onePlusLambda_V); } @@ -523,7 +533,7 @@ struct SGGXDielectricBxDF scalar_type orientedEta, dummy; const bool backside = math::getOrientedEtas(orientedEta, dummy, params.VdotH, eta); const scalar_type orientedEta2 = orientedEta * orientedEta; - + const scalar_type VdotHLdotH = params.VdotH * params.LdotH; const bool transmitted = VdotHLdotH < 0.0; @@ -550,10 +560,10 @@ struct SGGXDielectricBxDF { const scalar_type localVdotH = nbl::hlsl::dot(localV,H); const scalar_type reflectance = fresnelDielectric_common(orientedEta2,nbl::hlsl::abs(localVdotH)); - + scalar_type rcpChoiceProb; bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); - + cache = anisocache_type::create(localV, H); const scalar_type VdotH = cache.VdotH; @@ -630,7 +640,7 @@ struct SGGXDielectricBxDF { const scalar_type ax2 = A.x*A.x; const scalar_type ay2 = A.y*A.y; - + scalar_type _pdf = pdf(params); smith::GGX ggx_smith; diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 3542e2dfef..42a923f650 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -31,8 +31,9 @@ struct Bilinear return retval; } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u) { + vector2_type u = _u; const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); Linear lineary = Linear::create(twiceAreasUnderXCurve); u.y = lineary.generate(u.y); diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 1a5c96b6df..bd9dcc163d 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -23,8 +23,8 @@ vector concentricMapping(vector _u) vector u = 2.0f * _u - hlsl::promote >(1.0); vector p; - if (hlsl::all >(glsl::equal(u, hlsl::promote >(0.0)))) - p = hlsl::promote >(0.0); + if (nbl::hlsl::all >(u == (vector)(0.0))) + p = (vector)(0.0); else { T r; diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index cfc96dc9cb..f2f29ed12b 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -36,22 +36,23 @@ struct ProjectedSphericalTriangle vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF) { const scalar_type minimumProjSolidAngle = 0.0; - + matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle); return bxdfPdfAtVertex.yyxz; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u) { + vector2_type u; // pre-warp according to proj solid angle approximation vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); Bilinear bilinear = Bilinear::create(patch); u = bilinear.generate(rcpPdf, u); // now warp the points onto a spherical triangle - const vector3_type L = tri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); rcpPdf *= solidAngle; return L; @@ -68,7 +69,7 @@ struct ProjectedSphericalTriangle scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) { scalar_type pdf; - const vector2_type u = tri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); Bilinear bilinear = Bilinear::create(patch); @@ -78,7 +79,7 @@ struct ProjectedSphericalTriangle scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) { scalar_type pdf; - const vector2_type u = tri.generateInverse(pdf, L); + const vector2_type u = sphtri.generateInverse(pdf, L); vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); Bilinear bilinear = Bilinear::create(patch); @@ -86,6 +87,7 @@ struct ProjectedSphericalTriangle } shapes::SphericalTriangle tri; + sampling::SphericalTriangle sphtri; }; } diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index c42bf8e464..cca3f21dd9 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -35,7 +35,7 @@ struct SphericalRectangle vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt(vector4_type(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); + const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl index 47d3927f31..f1a1e37575 100644 --- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -43,7 +43,7 @@ struct SphericalRectangle { const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); - const vector4_type cosGamma = vec4( + const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], -n_z[2] * n_z[3], diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index d904ed7246..67fdfa0476 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -81,7 +81,7 @@ struct SphericalTriangle cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); - const vector3_type externalProducts = nbl::hlsl::abs(/* transposed already */awayFromEdgePlane * receiverNormal); + const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); const vector3_type pyramidAngles = acos(cos_sides); return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); From ed713f8162977ff95d4a8daa0e6301c236a14348 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Feb 2025 10:38:37 +0700 Subject: [PATCH 027/140] fix wrong template usage Signed-off-by: Corey --- include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index bd9dcc163d..5f29b80f8c 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -23,7 +23,7 @@ vector concentricMapping(vector _u) vector u = 2.0f * _u - hlsl::promote >(1.0); vector p; - if (nbl::hlsl::all >(u == (vector)(0.0))) + if (nbl::hlsl::all >(u == (vector)(0.0))) p = (vector)(0.0); else { From 3de1462ec75e25428a436174bbfd881753f69ecb Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 4 Mar 2025 17:03:09 +0700 Subject: [PATCH 028/140] fix typo Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/common.hlsl | 1 - 1 file changed, 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index e59a2c7de4..5ba64f87df 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -828,7 +828,6 @@ struct SAnisotropicMicrofacetCache NBL_CONST_REF_ARG(fresnel::OrientedEtas) orientedEtas ) { - isocache_type iso = (isocache_type)retval; vector3_type H; const bool valid = isocache_type::compute(iso,interaction,_sample,eta,H); retval = (this_t)iso; From 3bd988be18802287363e1c4df350ae434c3b49e5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 14 Mar 2025 17:02:10 +0700 Subject: [PATCH 029/140] fixed some func usage to nbl ver Signed-off-by: Corey --- .../hlsl/sampling/spherical_triangle.hlsl | 2 +- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 35 +++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 1d4fda454d..7828fc14ea 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -107,7 +107,7 @@ struct SphericalTriangle const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); - const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < asfloat(0x3f7fffff) ? cosBC_s : cos_c)); + const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); return vector2_type(u,v); } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index 67fdfa0476..d3f5a90215 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -5,6 +5,7 @@ #ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#include #include #include #include @@ -33,9 +34,13 @@ struct SphericalTriangle bool pyramidAngles(NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides) { - cos_sides = vector3_type(nbl::hlsl::dot(vertex1, vertex2), nbl::hlsl::dot(vertex2, vertex0), nbl::hlsl::dot(vertex0, vertex1)); - csc_sides = 1.0 / nbl::hlsl::sqrt((vector3_type)(1.f) - cos_sides * cos_sides); - return nbl::hlsl::any(csc_sides >= (vector3_type)(numeric_limits::max)); + cos_sides = vector3_type(hlsl::dot(vertex1, vertex2), hlsl::dot(vertex2, vertex0), hlsl::dot(vertex0, vertex1)); + csc_sides = (vector3_type)(1.f) - cos_sides * cos_sides; + csc_sides.x = hlsl::rsqrt(csc_sides.x); + csc_sides.y = hlsl::rsqrt(csc_sides.y); + csc_sides.z = hlsl::rsqrt(csc_sides.z); + + return hlsl::any >(csc_sides >= (vector3_type)(numeric_limits::max)); } scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) @@ -51,8 +56,8 @@ struct SphericalTriangle csc_c = csc_sides[2]; // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI - cos_vertices = clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) - sin_vertices = sqrt((vector3_type)1.f - cos_vertices * cos_vertices); + cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + sin_vertices = hlsl::sqrt((vector3_type)1.f - cos_vertices * cos_vertices); return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]); } @@ -69,22 +74,22 @@ struct SphericalTriangle if (pyramidAngles(cos_sides, csc_sides)) return 0.f; - vector3_type awayFromEdgePlane0 = nbl::hlsl::cross(vertex1, vertex2) * csc_sides[0]; - vector3_type awayFromEdgePlane1 = nbl::hlsl::cross(vertex2, vertex0) * csc_sides[1]; - vector3_type awayFromEdgePlane2 = nbl::hlsl::cross(vertex0, vertex1) * csc_sides[2]; + vector3_type awayFromEdgePlane0 = hlsl::cross(vertex1, vertex2) * csc_sides[0]; + vector3_type awayFromEdgePlane1 = hlsl::cross(vertex2, vertex0) * csc_sides[1]; + vector3_type awayFromEdgePlane2 = hlsl::cross(vertex0, vertex1) * csc_sides[2]; // useless here but could be useful somewhere else - cos_vertices[0] = nbl::hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); - cos_vertices[1] = nbl::hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); - cos_vertices[2] = nbl::hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); + cos_vertices[0] = hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); + cos_vertices[1] = hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); + cos_vertices[2] = hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works - cos_vertices = nbl::hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); + cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); - const vector3_type externalProducts = nbl::hlsl::abs(nbl::hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); + const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); - const vector3_type pyramidAngles = acos(cos_sides); - return nbl::hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + const vector3_type pyramidAngles = acos(cos_sides); + return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); } vector3_type vertex0; From 320bf0e01573ded81b927d8b7560bcc5232e8a08 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 17 Mar 2025 13:57:25 +0700 Subject: [PATCH 030/140] specify template args Signed-off-by: Corey --- .../hlsl/sampling/spherical_rectangle.hlsl | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index cca3f21dd9..663cd5e3d1 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -35,7 +35,7 @@ struct SphericalRectangle vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); + const vector4_type n_z = denorm_n_z / hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], @@ -54,24 +54,24 @@ struct SphericalRectangle const scalar_type CLAMP_EPS = 1e-5f; // flip z axsis if rect.r0.z > 0 - const uint32_t zFlipMask = (asuint(rect.r0.z) ^ 0x80000000u) & 0x80000000u; - rect.r0.z = asfloat(asuint(rect.r0.z) ^ zFlipMask); + const uint32_t zFlipMask = (bit_cast(rect.r0.z) ^ 0x80000000u) & 0x80000000u; + rect.r0.z = bit_cast(bit_cast(rect.r0.z) ^ zFlipMask); vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); const scalar_type au = uv.x * S + k; - const scalar_type fu = (nbl::hlsl::cos(au) * b0 - b1) / nbl::hlsl::sin(au); - const scalar_type cu_2 = nbl::hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] - const scalar_type cu = asfloat(asuint(1.0 / nbl::hlsl::sqrt(cu_2)) ^ (asuint(fu) & 0x80000000u)); + const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); + const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] + const scalar_type cu = bit_cast(bit_cast(1.0 / hlsl::sqrt(cu_2)) ^ (bit_cast(fu) & 0x80000000u)); - scalar_type xu = -(cu * rect.r0.z) * 1.0 / nbl::hlsl::sqrt(1 - cu * cu); - xu = nbl::hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs + scalar_type xu = -(cu * rect.r0.z) * 1.0 / hlsl::sqrt(1 - cu * cu); + xu = hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; - const scalar_type d = nbl::hlsl::sqrt(d_2); + const scalar_type d = hlsl::sqrt(d_2); - const scalar_type h0 = rect.r0.y / nbl::hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); - const scalar_type h1 = r1.y / nbl::hlsl::sqrt(d_2 + r1.y * r1.y); + const scalar_type h0 = rect.r0.y / hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); + const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); const scalar_type hv = h0 + uv.y * (h1 - h0), hv2 = hv * hv; - const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / nbl::hlsl::sqrt(1 - hv2) : r1.y; + const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / hlsl::sqrt(1 - hv2) : r1.y; return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); } From 52ad8a9841997db97852af473b6f4a7a81a32398 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 21 Mar 2025 16:49:40 +0700 Subject: [PATCH 031/140] fix use of static const in func Signed-off-by: Corey --- include/nbl/builtin/hlsl/math/functions.hlsl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index d3f5b167f6..046c72c527 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -122,11 +122,7 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) { -#ifdef __HLSL_VERSION - NBL_CONSTEXPR_FUNC_SCOPE_VAR float NEXT_ULP_AFTER_UNITY = asfloat(0x3f800001u); -#else - NBL_CONSTEXPR_FUNC_SCOPE_VAR float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); -#endif + const float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY From 8aa9903edf59a98de4b53d3f3b28023c18057f96 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 24 Mar 2025 14:43:43 +0700 Subject: [PATCH 032/140] added more morton order stuff Signed-off-by: Corey --- include/nbl/builtin/glsl/utils/morton.glsl | 17 ++++++ include/nbl/builtin/hlsl/math/morton.hlsl | 68 ++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 include/nbl/builtin/hlsl/math/morton.hlsl diff --git a/include/nbl/builtin/glsl/utils/morton.glsl b/include/nbl/builtin/glsl/utils/morton.glsl index de3be8b9c7..fd07a9cad8 100644 --- a/include/nbl/builtin/glsl/utils/morton.glsl +++ b/include/nbl/builtin/glsl/utils/morton.glsl @@ -22,6 +22,18 @@ uint nbl_glsl_morton_decode2d8bComponent(in uint x) return x; } +uint nbl_glsl_morton_decode2d32bComponent(in uint x) +{ + x &= 0x55555555u; + x = (x ^ (x >> 1u)) & 0x33333333u; + x = (x ^ (x >> 2u)) & 0x0f0f0f0fu; + x = (x ^ (x >> 4u)) & 0x00ff00ffu; + x = (x ^ (x >> 8u)) & 0x0000ffffu; + x = (x ^ (x >> 16u)); + return x; +} + + uvec2 nbl_glsl_morton_decode2d4b(in uint x) { return uvec2(nbl_glsl_morton_decode2d4bComponent(x), nbl_glsl_morton_decode2d4bComponent(x >> 1u)); @@ -32,4 +44,9 @@ uvec2 nbl_glsl_morton_decode2d8b(in uint x) return uvec2(nbl_glsl_morton_decode2d8bComponent(x), nbl_glsl_morton_decode2d8bComponent(x >> 1u)); } +uvec2 nbl_glsl_morton_decode2d32b(in uint x) +{ + return uvec2(nbl_glsl_morton_decode2d32bComponent(x), nbl_glsl_morton_decode2d32bComponent(x >> 1u)); +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/math/morton.hlsl b/include/nbl/builtin/hlsl/math/morton.hlsl new file mode 100644 index 0000000000..4a6cb5dfd3 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/morton.hlsl @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + +namespace impl +{ + +template +struct MortonComponent; + +template +struct MortonComponent +{ + static T decode2d(T x) + { + x &= 0x55555555u; + x = (x ^ (x >> 1u)) & 0x33333333u; + x = (x ^ (x >> 2u)) & 0x0f0f0f0fu; + x = (x ^ (x >> 4u)) & 0x00ff00ffu; + return x; + } +}; + +template +struct MortonComponent +{ + static T decode2d(T x) + { + x &= 0x55555555u; + x = (x ^ (x >> 1u)) & 0x33333333u; + x = (x ^ (x >> 2u)) & 0x0f0f0f0fu; + x = (x ^ (x >> 4u)) & 0x00ff00ffu; + x = (x ^ (x >> 8u)) & 0x0000ffffu; + x = (x ^ (x >> 16u)); + return x; + } +}; + +} + +template +struct Morton +{ + using vector2_type = vector; + using component_type = impl::MortonComponent; + + static vector2_type decode2d(T x) + { + return vector2_type(component_type::decode2d(x), component_type::decode2d(x >> 1u)); + } +}; + +} +} +} + +#endif From f7b0eef21a7490d794c4c90a3b91a6656d091c77 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 10:14:28 +0700 Subject: [PATCH 033/140] use new angle adder Signed-off-by: Corey --- .../nbl/builtin/hlsl/sampling/spherical_triangle.hlsl | 5 ++++- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 7828fc14ea..fd3a616e8d 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -103,7 +103,10 @@ struct SphericalTriangle const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_; const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); - const scalar_type subTriSolidAngleRatio = math::getArccosSumofABC_minus_PI(cos_vertices[0], cosB_, cosC_, sin_vertices[0], sinB_, sinC_) * pdf; + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); + angle_adder.addAngle(cosB_, sinB_); + angle_adder.addAngle(cosC_, sinC_); + const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * pdf; const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index d3f5a90215..de82849389 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -9,6 +9,8 @@ #include #include #include +#include +#include namespace nbl { @@ -59,7 +61,10 @@ struct SphericalTriangle cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) sin_vertices = hlsl::sqrt((vector3_type)1.f - cos_vertices * cos_vertices); - return math::getArccosSumofABC_minus_PI(cos_vertices[0], cos_vertices[1], cos_vertices[2], sin_vertices[0], sin_vertices[1], sin_vertices[2]); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); + angle_adder.addAngle(cos_vertices[1], sin_vertices[1]); + angle_adder.addAngle(cos_vertices[2], sin_vertices[2]); + return angle_adder.getSumofArccos() - numbers::pi; } scalar_type solidAngleOfTriangle() @@ -89,7 +94,7 @@ struct SphericalTriangle const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); const vector3_type pyramidAngles = acos(cos_sides); - return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); } vector3_type vertex0; From c29fa62a4c58353fa89a7aa99a3081a416ec5d4b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 15:22:16 +0700 Subject: [PATCH 034/140] initialize invalid ndf return to inf, not 0 Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl | 4 ++-- include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl index 1406bc8d4f..c719bbfd4e 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/beckmann.hlsl @@ -339,8 +339,8 @@ struct Beckmann if (isInfinity) { quant_type dmq; - dmq.microfacetMeasure = scalar_type(0.0); - dmq.projectedLightMeasure = scalar_type(0.0); + dmq.microfacetMeasure = bit_cast(numeric_limits::infinity); + dmq.projectedLightMeasure = bit_cast(numeric_limits::infinity); return dmq; } scalar_type dg1 = D / (scalar_type(1.0) + query.getLambdaV()); diff --git a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl index 58f697e19c..c64f6e3b84 100644 --- a/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/ndf/ggx.hlsl @@ -279,8 +279,8 @@ struct GGX quant_type dmq; if (isInfinity) { - dmq.microfacetMeasure = scalar_type(0.0); - dmq.projectedLightMeasure = scalar_type(0.0); + dmq.microfacetMeasure = bit_cast(numeric_limits::infinity); + dmq.projectedLightMeasure = bit_cast(numeric_limits::infinity); return dmq; } @@ -337,8 +337,8 @@ struct GGX if (isInfinity) { quant_type dmq; - dmq.microfacetMeasure = scalar_type(0.0); - dmq.projectedLightMeasure = scalar_type(0.0); + dmq.microfacetMeasure = bit_cast(numeric_limits::infinity); + dmq.projectedLightMeasure = bit_cast(numeric_limits::infinity); return dmq; } dg *= correlated_wo_numerator(query, _sample, interaction, cache); From 49db9d6094cce0d44c72f4fe80db0041f69fc90e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 15:22:59 +0700 Subject: [PATCH 035/140] derive trait from cook torrance base Signed-off-by: Corey --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 24 ++++++++++++++----- .../hlsl/bxdf/reflection/beckmann.hlsl | 18 +------------- .../nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl | 18 +------------- .../hlsl/bxdf/reflection/iridescent.hlsl | 9 +------ .../hlsl/bxdf/transmission/beckmann.hlsl | 18 +------------- .../builtin/hlsl/bxdf/transmission/ggx.hlsl | 18 +------------- .../hlsl/bxdf/transmission/iridescent.hlsl | 9 +------ 7 files changed, 24 insertions(+), 90 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index a185dc8d98..306198f827 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -133,7 +133,7 @@ struct SCookTorrance static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH) { spectral_type throughputWeights = interaction.getLuminosityContributionHint(); - return hlsl::dot(impl::__implicit_promote::__call(orientedFresnel(clampedVdotH)), throughputWeights); + return hlsl::dot(orientedFresnel(clampedVdotH), throughputWeights); } template, typename C=bool_constant NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome) @@ -142,7 +142,7 @@ struct SCookTorrance return orientedFresnel(clampedVdotH)[0]; } - bool __dotIsUnity(const vector3_type a, const vector3_type b, const scalar_type value) + bool __dotIsValue(const vector3_type a, const vector3_type b, const scalar_type value) { const scalar_type ab = hlsl::dot(a, b); return hlsl::max(ab, value / ab) <= scalar_type(value + 1e-3); @@ -209,11 +209,11 @@ struct SCookTorrance ray_dir_info_type V = interaction.getV(); const matrix3x3_type fromTangent = interaction.getFromTangentSpace(); // tangent frame orthonormality - assert(__dotIsUnity(fromTangent[0],fromTangent[1],0.0)); - assert(__dotIsUnity(fromTangent[1],fromTangent[2],0.0)); - assert(__dotIsUnity(fromTangent[2],fromTangent[0],0.0)); + assert(__dotIsValue(fromTangent[0],fromTangent[1],0.0)); + assert(__dotIsValue(fromTangent[1],fromTangent[2],0.0)); + assert(__dotIsValue(fromTangent[2],fromTangent[0],0.0)); // NDF sampling produced a unit length direction - assert(__dotIsUnity(localH,localH,1.0)); + assert(__dotIsValue(localH,localH,1.0)); const vector3_type H = hlsl::mul(interaction.getFromTangentSpace(), localH); Refract r = Refract::create(V.getDirection(), H); @@ -409,6 +409,18 @@ struct SCookTorrance fresnel_type fresnel; // always front-facing }; + +template +struct traits > +{ + using __type = SCookTorrance; + + NBL_CONSTEXPR_STATIC_INLINE BxDFType type = conditional_value<__type::IsBSDF, BxDFType, BxDFType::BT_BSDF, BxDFType::BT_BRDF>::value; + NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; + NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = !__type::IsBSDF; + NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = !__type::IsBSDF; +}; + } } } diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl index f37d0d9fd8..cb7743e02d 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/beckmann.hlsl @@ -27,23 +27,7 @@ using SBeckmannAnisotropic = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; - -template -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl index 049480afab..0f49d0be43 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/ggx.hlsl @@ -27,23 +27,7 @@ using SGGXAnisotropic = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; - -template -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl index 07762d1298..e30c3efdab 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl @@ -20,14 +20,7 @@ using SIridescent = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BRDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl index fa315b40ea..8c61692c5c 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/beckmann.hlsl @@ -27,23 +27,7 @@ using SBeckmannDielectricAnisotropic = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; - -template -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl index 51f096532b..cdd4483c7f 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/ggx.hlsl @@ -27,23 +27,7 @@ using SGGXDielectricAnisotropic = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; - -template -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl index 2e7aa0e56e..8e06fc34e7 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl @@ -20,14 +20,7 @@ using SIridescent = SCookTorrance -struct traits > -{ - NBL_CONSTEXPR_STATIC_INLINE BxDFType type = BT_BSDF; - NBL_CONSTEXPR_STATIC_INLINE bool IsMicrofacet = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotV = true; - NBL_CONSTEXPR_STATIC_INLINE bool clampNdotL = true; -}; +// inherit trait from cook torrance base } } From 9eb3a183eb6075a9a6e0ecf720bc4e68b2cdb6c0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 15:23:29 +0700 Subject: [PATCH 036/140] fix angle adder Signed-off-by: Corey --- include/nbl/builtin/hlsl/math/angle_adding.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/math/angle_adding.hlsl b/include/nbl/builtin/hlsl/math/angle_adding.hlsl index 27d4f2a465..5ab661facb 100644 --- a/include/nbl/builtin/hlsl/math/angle_adding.hlsl +++ b/include/nbl/builtin/hlsl/math/angle_adding.hlsl @@ -44,7 +44,7 @@ struct sincos_accumulator const T cosB = runningSum.real(); const T sinB = runningSum.imag(); // TODO: prove if we infer overflow from sign of `d` instead - const bool overflow = abs(min(a, cosB)) > max(a, cosB); + const bool overflow = abs(min(cosA, cosB)) > max(cosA, cosB); const T c = cosA * cosB - sinA * sinB; const T d = sinA * cosB + cosA * sinB; From f1e34548b115acece03b4fa553e2363ec3110641 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 16:06:16 +0700 Subject: [PATCH 037/140] optimizations to iridescent fresnel Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 157 +++++++++++------- .../hlsl/bxdf/reflection/iridescent.hlsl | 2 +- .../hlsl/bxdf/transmission/iridescent.hlsl | 2 +- 3 files changed, 99 insertions(+), 62 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 56ea88080c..e6bb3f98c2 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -497,7 +497,7 @@ struct Dielectric }; // adapted from https://belcour.github.io/blog/research/publication/2017/05/01/brdf-thin-film.html -template +template struct Iridescent; namespace impl @@ -543,47 +543,50 @@ struct iridescent_helper return xyz / scalar_type(1.0685e-7); } - template - static T __call(NBL_CONST_REF_ARG(Params) params, const scalar_type clampedCosTheta) + template + static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { - const vector_type wavelengths = vector_type(colorspace::scRGB::wavelength_R, colorspace::scRGB::wavelength_G, colorspace::scRGB::wavelength_B); + const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); - const vector_type eta12 = params.getEta12(); - const vector_type eta23 = params.getEta23(); - const vector_type etak23 = params.getEtak23(); const scalar_type cosTheta_1 = clampedCosTheta; - vector_type cosTheta_2; - vector_type R12p, R23p, R12s, R23s; - const vector_type scale = scalar_type(1.0)/eta12; - const vector_type cosTheta2_2 = hlsl::promote(1.0) - hlsl::promote(1.0-cosTheta_1*cosTheta_1) * scale * scale; - - cosTheta_2 = hlsl::sqrt(hlsl::max(cosTheta2_2, hlsl::promote(0.0))); - Dielectric::__polarized(eta12, hlsl::promote(cosTheta_1), R12p, R12s); + vector_type cosTheta_2; + vector::Dimension> notTIR; + { + const vector_type scale = scalar_type(1.0)/eta12; + const vector_type cosTheta2_2 = hlsl::promote(1.0) - hlsl::promote(scalar_type(1.0)-cosTheta_1*cosTheta_1) * scale * scale; + notTIR = cosTheta2_2 > hlsl::promote(0.0); + cosTheta_2 = hlsl::sqrt(hlsl::max(cosTheta2_2, hlsl::promote(0.0))); + } - // Reflected part by the base - // if kappa==0, base material is dielectric - NBL_IF_CONSTEXPR(SupportsTransmission) - Dielectric::__polarized(eta23 * eta23, cosTheta_2, R23p, R23s); - else + if (hlsl::any(notTIR)) { - vector_type etaLen2 = eta23 * eta23 + etak23 * etak23; - Conductor::__polarized(eta23, etaLen2, cosTheta_2, R23p, R23s); + Dielectric::__polarized(eta12, hlsl::promote(cosTheta_1), R12p, R12s); + + // Reflected part by the base + // if kappa==0, base material is dielectric + NBL_IF_CONSTEXPR(SupportsTransmission) + Dielectric::__polarized(eta23 * eta23, cosTheta_2, R23p, R23s); + else + { + vector_type etaLen2 = eta23 * eta23 + etak23 * etak23; + Conductor::__polarized(eta23, etaLen2, cosTheta_2, R23p, R23s); + } } // Check for total internal reflection - R12s = hlsl::mix(R12s, hlsl::promote(1.0), cosTheta2_2 <= hlsl::promote(0.0)); - R12p = hlsl::mix(R12p, hlsl::promote(1.0), cosTheta2_2 <= hlsl::promote(0.0)); - - R23s = hlsl::mix(R23s, hlsl::promote(0.0), cosTheta2_2 <= hlsl::promote(0.0)); - R23p = hlsl::mix(R23p, hlsl::promote(0.0), cosTheta2_2 <= hlsl::promote(0.0)); + const vector_type notTIRFactor = vector_type(notTIR); // 0 when TIR, 1 otherwise + R12s = R12s * notTIRFactor; + R12p = R12p * notTIRFactor; + R23s = R23s * notTIRFactor; + R23p = R23p * notTIRFactor; // Compute the transmission coefficients vector_type T121p = hlsl::promote(1.0) - R12p; vector_type T121s = hlsl::promote(1.0) - R12s; // Optical Path Difference - const vector_type D = hlsl::promote(2.0 * params.getDinc()) * params.getThinFilmIor() * cosTheta_2; + const vector_type D = _D * cosTheta_2; const vector_type Dphi = hlsl::promote(2.0 * numbers::pi) * D / wavelengths; vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs; @@ -634,82 +637,115 @@ struct iridescent_helper } }; -template) +template) struct iridescent_base { using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - scalar_type getDinc() NBL_CONST_MEMBER_FUNC { return Dinc; } - vector_type getThinFilmIor() NBL_CONST_MEMBER_FUNC { return thinFilmIor; } + vector_type getD() NBL_CONST_MEMBER_FUNC { return D; } vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; } vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; } - vector_type getEtak23() NBL_CONST_MEMBER_FUNC - { - NBL_IF_CONSTEXPR(SupportsTransmission) - return hlsl::promote(0.0); - else - return etak23; - } - scalar_type Dinc; // thickness of thin film in nanometers, rec. 100-25000nm - vector_type thinFilmIor; + vector_type D; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR - vector_type etak23; // thin-film -> complex component, k==0 makes dielectric }; } -template +template NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) -struct Iridescent) > +struct Iridescent) > : impl::iridescent_base { - using this_t = Iridescent; + using this_t = Iridescent; using scalar_type = typename vector_traits::scalar_type; using vector_type = T; // assert dim==3? using eta_type = vector_type; - using base_type = impl::iridescent_base; + using base_type = impl::iridescent_base; NBL_CONSTEXPR_STATIC_INLINE bool ReturnsMonochrome = vector_traits::Dimension == 1; + struct SCreationParams + { + scalar_type Dinc; // thickness of thin film in nanometers, rec. 100-25000nm + vector_type ior1; // outside (usually air 1.0) + vector_type ior2; // thin-film ior + vector_type ior3; // base mat ior + vector_type iork3; + }; + using creation_params_type = SCreationParams; + + static this_t create(NBL_CONST_REF_ARG(creation_params_type) params) + { + this_t retval; + retval.D = hlsl::promote(2.0 * params.Dinc) * params.ior2; + retval.eta12 = params.ior2/params.ior1; + retval.eta23 = params.ior3/params.ior2; + retval.etak23 = params.iork3/params.ior2; + return retval; + } + T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC { - return impl::iridescent_helper::template __call(__base, clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); } OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / __base.eta23; + rcpEta.value = hlsl::promote(1.0) / base_type::eta23; rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } - base_type __base; + vector_type getEtak23() NBL_CONST_MEMBER_FUNC + { + return etak23; + } + + vector_type etak23; // thin-film -> complex component }; -template +template NBL_PARTIAL_REQ_TOP(concepts::FloatingPointLikeVectorial) -struct Iridescent) > +struct Iridescent) > : impl::iridescent_base { - using this_t = Iridescent; + using this_t = Iridescent; using scalar_type = typename vector_traits::scalar_type; using vector_type = T; // assert dim==3? using eta_type = vector; - using base_type = impl::iridescent_base; + using base_type = impl::iridescent_base; NBL_CONSTEXPR_STATIC_INLINE bool ReturnsMonochrome = vector_traits::Dimension == 1; + struct SCreationParams + { + scalar_type Dinc; // thickness of thin film in nanometers, rec. 100-25000nm + vector_type ior1; // outside (usually air 1.0) + vector_type ior2; // thin-film ior + vector_type ior3; // base mat ior + }; + using creation_params_type = SCreationParams; + + static this_t create(NBL_CONST_REF_ARG(creation_params_type) params) + { + this_t retval; + retval.D = hlsl::promote(2.0 * params.Dinc) * params.ior2; + retval.eta12 = params.ior2/params.ior1; + retval.eta23 = params.ior3/params.ior2; + return retval; + } + T operator()(const scalar_type clampedCosTheta) NBL_CONST_MEMBER_FUNC { - return impl::iridescent_helper::template __call(__base, clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); } - scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return __base.eta23[0]; } + scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; } OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / __base.eta23[0]; + rcpEta.value = hlsl::promote(1.0) / base_type::eta23[0]; rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -718,15 +754,16 @@ struct Iridescent(1.0)/__base.eta12, flip); - orientedFresnel.__base.eta23 = hlsl::mix(__base.eta23, hlsl::promote(1.0)/__base.eta23, flip); - orientedFresnel.__base.etak23 = hlsl::promote(0.0); + orientedFresnel.D = base_type::D; + orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote(1.0)/base_type::eta12, flip); + orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta23, flip); return orientedFresnel; } - base_type __base; + vector_type getEtak23() NBL_CONST_MEMBER_FUNC + { + return hlsl::promote(0.0); + } }; diff --git a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl index e30c3efdab..a6120233bb 100644 --- a/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/reflection/iridescent.hlsl @@ -16,7 +16,7 @@ namespace reflection { template -using SIridescent = SCookTorrance, fresnel::Iridescent >; +using SIridescent = SCookTorrance, fresnel::Iridescent >; } diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl index 8e06fc34e7..05b1753aca 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/iridescent.hlsl @@ -16,7 +16,7 @@ namespace transmission { template -using SIridescent = SCookTorrance, fresnel::Iridescent >; +using SIridescent = SCookTorrance, fresnel::Iridescent >; } From f4755ddd8ce50717ddae32b000b3d7f48b45a554 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 16:25:26 +0700 Subject: [PATCH 038/140] avoid repeat fresnel calc Signed-off-by: Corey --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 306198f827..3789dded2e 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -130,14 +130,15 @@ struct SCookTorrance template, typename C=bool_constant NBL_FUNC_REQUIRES(C::value && !fresnel_type::ReturnsMonochrome) - static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH) + static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal) { spectral_type throughputWeights = interaction.getLuminosityContributionHint(); - return hlsl::dot(orientedFresnel(clampedVdotH), throughputWeights); + outFresnelVal = orientedFresnel(clampedVdotH); + return hlsl::dot(outFresnelVal, throughputWeights); } template, typename C=bool_constant NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome) - static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH) + static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal) { return orientedFresnel(clampedVdotH)[0]; } @@ -294,7 +295,8 @@ struct SCookTorrance assert(NdotV*VdotH >= scalar_type(0.0)); } - const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH)); + spectral_type dummy; + const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), dummy); scalar_type rcpChoiceProb; scalar_type z = u.z; @@ -337,7 +339,8 @@ struct SCookTorrance NBL_IF_CONSTEXPR(IsBSDF) { - const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH())); + spectral_type dummy; + const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), dummy); return hlsl::mix(reflectance, scalar_type(1.0) - reflectance, cache.isTransmission()) * DG1.projectedLightMeasure; } else @@ -389,8 +392,8 @@ struct SCookTorrance quo = hlsl::promote(G2_over_G1); else { - const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH())); - spectral_type reflectance = impl::__implicit_promote::__call(_f(hlsl::abs(cache.getVdotH()))); + spectral_type reflectance; + const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), reflectance); quo = hlsl::mix(reflectance / scaled_reflectance, (hlsl::promote(1.0) - reflectance) / (scalar_type(1.0) - scaled_reflectance), cache.isTransmission()) * G2_over_G1; } From 558177c4468554344d802577ba9c12b75ea30003 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 13 Nov 2025 16:37:47 +0700 Subject: [PATCH 039/140] mix reflectance w/ transmission in getScaledReflectance Signed-off-by: Corey --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 3789dded2e..e88d5fccb7 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -130,17 +130,19 @@ struct SCookTorrance template, typename C=bool_constant NBL_FUNC_REQUIRES(C::value && !fresnel_type::ReturnsMonochrome) - static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal) + static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, bool transmitted, NBL_REF_ARG(spectral_type) outFresnelVal) { spectral_type throughputWeights = interaction.getLuminosityContributionHint(); - outFresnelVal = orientedFresnel(clampedVdotH); + spectral_type reflectance = orientedFresnel(clampedVdotH); + outFresnelVal = hlsl::mix(reflectance, hlsl::promote(1.0)-reflectance, transmitted); return hlsl::dot(outFresnelVal, throughputWeights); } template, typename C=bool_constant NBL_FUNC_REQUIRES(C::value && fresnel_type::ReturnsMonochrome) - static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, NBL_REF_ARG(spectral_type) outFresnelVal) + static scalar_type __getScaledReflectance(NBL_CONST_REF_ARG(fresnel_type) orientedFresnel, NBL_CONST_REF_ARG(Interaction) interaction, scalar_type clampedVdotH, bool transmitted, NBL_REF_ARG(spectral_type) outFresnelVal) { - return orientedFresnel(clampedVdotH)[0]; + scalar_type reflectance = orientedFresnel(clampedVdotH)[0]; + return hlsl::mix(reflectance, scalar_type(1.0)-reflectance, transmitted); } bool __dotIsValue(const vector3_type a, const vector3_type b, const scalar_type value) @@ -296,7 +298,7 @@ struct SCookTorrance } spectral_type dummy; - const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), dummy); + const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(VdotH), false, dummy); scalar_type rcpChoiceProb; scalar_type z = u.z; @@ -340,8 +342,8 @@ struct SCookTorrance NBL_IF_CONSTEXPR(IsBSDF) { spectral_type dummy; - const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), dummy); - return hlsl::mix(reflectance, scalar_type(1.0) - reflectance, cache.isTransmission()) * DG1.projectedLightMeasure; + const scalar_type reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), cache.isTransmission(), dummy); + return reflectance * DG1.projectedLightMeasure; } else { @@ -393,9 +395,8 @@ struct SCookTorrance else { spectral_type reflectance; - const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), reflectance); - quo = hlsl::mix(reflectance / scaled_reflectance, - (hlsl::promote(1.0) - reflectance) / (scalar_type(1.0) - scaled_reflectance), cache.isTransmission()) * G2_over_G1; + const scalar_type scaled_reflectance = __getScaledReflectance(_f, interaction, hlsl::abs(cache.getVdotH()), cache.isTransmission(), reflectance); + quo = reflectance / scaled_reflectance * G2_over_G1; } } else From 3f92c276dde896e3191d471e8a5891e94f9735a5 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 14 Nov 2025 14:49:25 +0700 Subject: [PATCH 040/140] some minor fixes to fresnel orientedEta usage Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/common.hlsl | 6 +++--- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/common.hlsl b/include/nbl/builtin/hlsl/bxdf/common.hlsl index 5ba64f87df..6af3b4c01b 100644 --- a/include/nbl/builtin/hlsl/bxdf/common.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/common.hlsl @@ -603,7 +603,7 @@ struct SIsotropicMicrofacetCache // not coming from the medium (reflected) OR // exiting at the macro scale AND ( (not L outside the cone of possible directions given IoR with constraint VdotH*LdotH<0.0) OR (microfacet not facing toward the macrosurface, i.e. non heightfield profile of microsurface) ) - const bool valid = ComputeMicrofacetNormal::isValidMicrofacet(transmitted, VdotL, retval.absNdotH, computeMicrofacetNormal.orientedEta); + const bool valid = ComputeMicrofacetNormal::isValidMicrofacet(transmitted, VdotL, retval.absNdotH, fresnel::OrientedEtas::create(1.0, computeMicrofacetNormal.orientedEta)); if (valid) { retval.VdotH = hlsl::dot(computeMicrofacetNormal.V,H); @@ -626,7 +626,7 @@ struct SIsotropicMicrofacetCache const bool transmitted = ComputeMicrofacetNormal::isTransmissionPath(NdotV,NdotL); ComputeMicrofacetNormal computeMicrofacetNormal = ComputeMicrofacetNormal::create(V,L,N,1.0); - computeMicrofacetNormal.orientedEta = orientedEtas; + computeMicrofacetNormal.orientedEta = orientedEtas.value[0]; return create(transmitted, computeMicrofacetNormal, VdotL, N, H); } @@ -652,7 +652,7 @@ struct SIsotropicMicrofacetCache const bool transmitted = ComputeMicrofacetNormal::isTransmissionPath(interaction.getNdotV(),_sample.getNdotL()); ComputeMicrofacetNormal computeMicrofacetNormal = ComputeMicrofacetNormal::create(V,L,N,1.0); - computeMicrofacetNormal.orientedEta = orientedEtas; + computeMicrofacetNormal.orientedEta = orientedEtas.value[0]; return create(transmitted, computeMicrofacetNormal, hlsl::dot(V, L), N, H); } diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index e6bb3f98c2..f7655e9978 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -141,7 +141,7 @@ struct ComputeMicrofacetNormal vector_type unnormalized(const bool _refract) { assert(hlsl::dot(V, L) <= -hlsl::min(orientedEta, scalar_type(1.0) / orientedEta)); - const scalar_type etaFactor = hlsl::mix(scalar_type(1.0), orientedEta.value, _refract); + const scalar_type etaFactor = hlsl::mix(scalar_type(1.0), orientedEta, _refract); vector_type tmpH = V + L * etaFactor; tmpH = ieee754::flipSign(tmpH, _refract && orientedEta > scalar_type(1.0)); return tmpH; From 0580e9910b7c458a68b585342d50f7f7f3fb79c4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 14 Nov 2025 14:49:59 +0700 Subject: [PATCH 041/140] refactor usage of angle adding Signed-off-by: Corey --- .../nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl | 8 ++++++-- include/nbl/builtin/hlsl/shapes/rectangle.hlsl | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 663cd5e3d1..127a7194b2 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -43,8 +43,12 @@ struct SphericalRectangle -n_z[3] * n_z[0] ); - scalar_type p = math::getSumofArccosAB(cosGamma[0], cosGamma[1]); - scalar_type q = math::getSumofArccosAB(cosGamma[2], cosGamma[3]); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); + angle_adder.addCosine(cosGamma[1]); + scalar_type p = angle_adder.getSumofArccos(); + angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); const scalar_type k = 2 * numbers::pi - q; const scalar_type b0 = n_z[0]; diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl index f1a1e37575..434918cc09 100644 --- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -8,6 +8,7 @@ #include #include #include +#include namespace nbl { @@ -49,7 +50,11 @@ struct SphericalRectangle -n_z[2] * n_z[3], -n_z[3] * n_z[0] ); - return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * numbers::pi; + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); + angle_adder.addCosine(cosGamma[1]); + angle_adder.addCosine(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + return angle_adder.getSumofArccos() - scalar_type(2.0) * numbers::pi; } vector3_type r0; From cb542f24be54502a1690eba952b554f3c7f38d40 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 14 Nov 2025 14:50:42 +0700 Subject: [PATCH 042/140] temp? fix for mix_helper on floats Signed-off-by: Corey --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index a5e48debbf..ae78ea92c3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -268,6 +268,20 @@ struct mix_helper) > } }; +template +NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable && concepts::Boolean) +struct mix_helper && concepts::Boolean) > +{ + using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; + // for a component of a that is false, the corresponding component of x is returned + // for a component of a that is true, the corresponding component of y is returned + // so we make sure this is correct when calling the operation + static inline return_t __call(const T x, const T y, const U a) + { + return spirv::select(a, y, x); + } +}; + template NBL_PARTIAL_REQ_TOP(matrix_traits::Square) struct determinant_helper::Square) > { From f826120430a6e39abd9674513234bbc1bb8406cd Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 18 Oct 2025 19:12:31 +0200 Subject: [PATCH 043/140] Created RWMC files Signed-off-by: Corey --- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 100 +++++++++++ include/nbl/builtin/hlsl/rwmc/rwmc.hlsl | 160 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 7 +- 3 files changed, 263 insertions(+), 4 deletions(-) create mode 100644 include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl new file mode 100644 index 0000000000..6678a66942 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -0,0 +1,100 @@ +#ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct CascadeSettings +{ + uint32_t size; + uint32_t start; + uint32_t base; +}; + +template +struct CascadeEntry +{ + CascadeLayerType data[CascadeSize]; +}; + +template +struct CascadeAccumulator +{ + using output_storage_type = CascadeEntry; + using initialization_data = CascadeSettings; + output_storage_type accumulation; + uint32_t cascadeSampleCounter[CascadeSize]; + CascadeSettings cascadeSettings; + + void initialize(in CascadeSettings settings) + { + for (int i = 0; i < CascadeSize; ++i) + { + accumulation.data[i] = (CascadeLayerType)0.0f; + cascadeSampleCounter[i] = 0u; + } + + cascadeSettings.size = settings.size; + cascadeSettings.start = settings.start; + cascadeSettings.base = settings.base; + } + + typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + const float luma = getLuma(sample); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + + const uint32_t sampleCount = sampleIndex + 1u; + const float reciprocalSampleCount = 1.0f / float(sampleCount); + accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; + accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl new file mode 100644 index 0000000000..d7b151af86 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl @@ -0,0 +1,160 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ +namespace impl +{ + +struct CascadeSample +{ + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; +}; + +// TODO: figure out what values should pixels outside have, 0.0f is incorrect +float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray cascade, uint32_t cascadeIndex) +{ + const int32_t2 texelCoord = currentCoord + offset; + if (any(texelCoord < int32_t2(0, 0))) + return float32_t3(0.0f, 0.0f, 0.0f); + + float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); + return float32_t3(output.r, output.g, output.b); +} + +float32_t calcLuma(in float32_t3 col) +{ + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); +} + +CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray cascade, in uint cascadeIndex, in float reciprocalBaseI) +{ + float32_t3 neighbourhood[9]; + neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex); + neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex); + neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex); + neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex); + neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex); + neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex); + neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex); + neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex); + neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + CascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; +} + +} // namespace impl + +struct ReweightingParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +{ + ReweightingParameters retval; + retval.lastCascadeIndex = cascadeSize - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray cascade, in int32_t2 coord) +{ + float reciprocalBaseI = 1.f; + impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (uint i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0u; + const bool notLastCascade = i != params.lastCascadeIndex; + + impl::CascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = impl::calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 58a75022cf..9dc7847acf 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -360,9 +360,8 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") -#morton codes -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") -#testing -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/testing/relative_approx_compare.hlsl") +#rwmc +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From 0a864b1400c134660fd1ffcf3756b9f71f05d8fe Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Thu, 13 Nov 2025 21:58:26 +0100 Subject: [PATCH 044/140] Refactored resolve.hlsl Signed-off-by: Corey --- .../concepts/accessors/loadable_image.hlsl | 15 +- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 126 ++++++------ .../builtin/hlsl/rwmc/ResolveParameters.hlsl | 45 +++++ .../hlsl/rwmc/SplattingParameters.hlsl | 23 +++ include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 189 ++++++++++++++++++ include/nbl/builtin/hlsl/rwmc/rwmc.hlsl | 160 --------------- src/nbl/builtin/CMakeLists.txt | 4 +- 7 files changed, 333 insertions(+), 229 deletions(-) create mode 100644 include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl create mode 100644 include/nbl/builtin/hlsl/rwmc/resolve.hlsl delete mode 100644 include/nbl/builtin/hlsl/rwmc/rwmc.hlsl diff --git a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl index c272eeb1ab..8c7251214d 100644 --- a/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl +++ b/include/nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl @@ -16,8 +16,15 @@ namespace concepts { namespace accessors { + +// concept `LoadableImage` translates to smth like this: +//template +//concept LoadableImage = requires(U a, vector uv, uint16_t layer) { +// ::nbl::hlsl::is_same_v().template get(uv,layer)), vector>; +//}; + // declare concept -#define NBL_CONCEPT_NAME StorableImage +#define NBL_CONCEPT_NAME LoadableImage #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) // not the greatest syntax but works @@ -26,12 +33,12 @@ namespace accessors #define NBL_CONCEPT_PARAM_2 (layer,uint16_t) // start concept NBL_CONCEPT_BEGIN(3) -// need to be defined AFTER the cocnept begins +// need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define uv NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 #define layer NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)) , ::nbl::hlsl::is_same_v, vector)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((a.template get(uv,layer)), ::nbl::hlsl::is_same_v, vector)) ); #undef layer #undef uv @@ -39,7 +46,7 @@ NBL_CONCEPT_END( #include // declare concept -#define NBL_CONCEPT_NAME MipmappedStorableImage +#define NBL_CONCEPT_NAME MipmappedLoadableImage #define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) #define NBL_CONCEPT_TPLT_PRM_NAMES (U)(T)(Dims) // not the greatest syntax but works diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 6678a66942..77cfb3c283 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -1,8 +1,10 @@ #ifndef _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ #define _NBL_HLSL_RWMC_CASCADE_ACCUMULATOR_INCLUDED_ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include #include #include +#include namespace nbl { @@ -11,86 +13,82 @@ namespace hlsl namespace rwmc { -struct CascadeSettings +template) +struct CascadeAccumulator { - uint32_t size; - uint32_t start; - uint32_t base; -}; + struct CascadeEntry + { + uint32_t cascadeSampleCounter[CascadeCount]; + CascadeLayerType data[CascadeCount]; -template -struct CascadeEntry -{ - CascadeLayerType data[CascadeSize]; -}; + void addSampleIntoCascadeEntry(CascadeLayerType _sample, uint32_t lowerCascadeIndex, float lowerCascadeLevelWeight, float higherCascadeLevelWeight, uint32_t sampleCount) + { + const float reciprocalSampleCount = 1.0f / float(sampleCount); + + uint32_t lowerCascadeSampleCount = cascadeSampleCounter[lowerCascadeIndex]; + data[lowerCascadeIndex] += (_sample * lowerCascadeLevelWeight - (sampleCount - lowerCascadeSampleCount) * data[lowerCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[lowerCascadeIndex] = sampleCount; + + uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; + if (higherCascadeIndex < CascadeCount) + { + uint32_t higherCascadeSampleCount = cascadeSampleCounter[higherCascadeIndex]; + data[higherCascadeIndex] += (_sample * higherCascadeLevelWeight - (sampleCount - higherCascadeSampleCount) * data[higherCascadeIndex]) * reciprocalSampleCount; + cascadeSampleCounter[higherCascadeIndex] = sampleCount; + } + } + }; -template -struct CascadeAccumulator -{ - using output_storage_type = CascadeEntry; - using initialization_data = CascadeSettings; + using cascade_layer_scalar_type = typename vector_traits::scalar_type; + using this_t = CascadeAccumulator; + using output_storage_type = CascadeEntry; + using initialization_data = SplattingParameters; output_storage_type accumulation; - uint32_t cascadeSampleCounter[CascadeSize]; - CascadeSettings cascadeSettings; + + SplattingParameters splattingParameters; - void initialize(in CascadeSettings settings) + static this_t create(NBL_CONST_REF_ARG(SplattingParameters) settings) { - for (int i = 0; i < CascadeSize; ++i) + this_t retval; + for (int i = 0; i < CascadeCount; ++i) { - accumulation.data[i] = (CascadeLayerType)0.0f; - cascadeSampleCounter[i] = 0u; + retval.accumulation.data[i] = promote(0.0f); + retval.accumulation.cascadeSampleCounter[i] = 0u; } + retval.splattingParameters = settings; - cascadeSettings.size = settings.size; - cascadeSettings.start = settings.start; - cascadeSettings.base = settings.base; + return retval; } - - typename vector_traits::scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) + + cascade_layer_scalar_type getLuma(NBL_CONST_REF_ARG(CascadeLayerType) col) { return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); } // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp - void addSample(uint32_t sampleIndex, float32_t3 sample) + void addSample(uint32_t sampleCount, CascadeLayerType _sample) { - float lowerScale = cascadeSettings.start; - float upperScale = lowerScale * cascadeSettings.base; - - const float luma = getLuma(sample); - - uint32_t lowerCascadeIndex = 0u; - while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) - { - lowerScale = upperScale; - upperScale *= cascadeSettings.base; - ++lowerCascadeIndex; - } - - float lowerCascadeLevelWeight; - float higherCascadeLevelWeight; - - if (luma <= lowerScale) - lowerCascadeLevelWeight = 1.0f; - else if (luma < upperScale) - lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); - else // Inf, NaN ... - lowerCascadeLevelWeight = 0.0f; - - if (luma < upperScale) - higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); - else - higherCascadeLevelWeight = upperScale / luma; - - uint32_t higherCascadeIndex = lowerCascadeIndex + 1u; - - const uint32_t sampleCount = sampleIndex + 1u; - const float reciprocalSampleCount = 1.0f / float(sampleCount); - accumulation.data[lowerCascadeIndex] += (sample * lowerCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[lowerCascadeIndex])) * accumulation.data[lowerCascadeIndex]) * reciprocalSampleCount; - accumulation.data[higherCascadeIndex] += (sample * higherCascadeLevelWeight - (sampleCount - (cascadeSampleCounter[higherCascadeIndex])) * accumulation.data[higherCascadeIndex]) * reciprocalSampleCount; - cascadeSampleCounter[lowerCascadeIndex] = sampleCount; - cascadeSampleCounter[higherCascadeIndex] = sampleCount; + const cascade_layer_scalar_type log2Start = splattingParameters.log2Start; + const cascade_layer_scalar_type log2Base = splattingParameters.log2Base; + const cascade_layer_scalar_type luma = getLuma(_sample); + const cascade_layer_scalar_type log2Luma = log2(luma); + const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; + const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); + // c<=0 -> 0, c>=Count-1 -> Count-1 + uint32_t lowerCascadeIndex = floor(cascade); + // 0 whenever clamped or `cascade` is integer (when `clampedCascade` is integer) + cascade_layer_scalar_type higherCascadeWeight = clampedCascade - floor(clampedCascade); + // never 0 thanks to magic of `1-fract(x)` + cascade_layer_scalar_type lowerCascadeWeight = cascade_layer_scalar_type(1) - higherCascadeWeight; + + // handle super bright sample case + if (cascade > CascadeCount - 1) + lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + + accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } + + }; } diff --git a/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl new file mode 100644 index 0000000000..7509eac493 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl @@ -0,0 +1,45 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct ResolveParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +ResolveParameters computeResolveParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) +{ + ResolveParameters retval; + retval.lastCascadeIndex = cascadeSize - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl new file mode 100644 index 0000000000..e74dd0e5bd --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + +struct SplattingParameters +{ + float log2Start; + float log2Base; +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl new file mode 100644 index 0000000000..cb8d3b27d1 --- /dev/null +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -0,0 +1,189 @@ +#ifndef _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RWMC_RESOLVE_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace rwmc +{ + // declare concept +#define NBL_CONCEPT_NAME ResolveAccessorBase +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename)(typename)(int32_t) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T)(VectorScalarType)(Dims) +// not the greatest syntax but works +#define NBL_CONCEPT_PARAM_0 (a,T) +#define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType) +#define NBL_CONCEPT_PARAM_2 (vec,vector) +// start concept + NBL_CONCEPT_BEGIN(2) +// need to be defined AFTER the concept begins +#define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +#define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 +#define vec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vec)))) +); +#undef a +#undef vec +#include + +/* ResolveAccessor is required to: +* - satisfy `LoadableImage` concept requirements +* - implement function called `calcLuma` which calculates luma from a pixel value +*/ + +template +NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; + +template +struct ResolveAccessorAdaptor +{ + using output_scalar_type = OutputScalar; + using output_type = vector; + NBL_CONSTEXPR int32_t image_dimension = 2; + + RWTexture2DArray cascade; + + float32_t calcLuma(in float32_t3 col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + template + output_type get(vector uv, uint16_t layer) + { + uint32_t imgWidth, imgHeight, layers; + cascade.GetDimensions(imgWidth, imgHeight, layers); + int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); + + if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) + return vector(0, 0, 0, 0); + + return cascade.Load(int32_t3(uv, int32_t(layer))); + } +}; + +template //NBL_PRIMARY_REQUIRES(ResolveAccessor) +struct Resolver +{ + using output_type = OutputColorType; + + struct CascadeSample + { + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; + }; + + static Resolver create(NBL_REF_ARG(ResolveParameters) resolveParameters) + { + Resolver retval; + retval.params = resolveParameters; + + return retval; + } + + output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) + { + float reciprocalBaseI = 1.f; + CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (int16_t i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0; + const bool notLastCascade = i != params.lastCascadeIndex; + + CascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = acc.calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; + } + + ResolveParameters params; + + // pseudo private stuff: + + CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI) + { + CascadeAccessor::output_type tmp; + output_type neighbourhood[9]; + neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex); + neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex); + neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex); + neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex); + neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex); + neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex); + neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex); + neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex); + neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + CascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = acc.calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (acc.calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; + } +}; + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl b/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl deleted file mode 100644 index d7b151af86..0000000000 --- a/include/nbl/builtin/hlsl/rwmc/rwmc.hlsl +++ /dev/null @@ -1,160 +0,0 @@ -#ifndef _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RWMC_RWMC_HLSL_INCLUDED_ - -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include - -namespace nbl -{ -namespace hlsl -{ -namespace rwmc -{ -namespace impl -{ - -struct CascadeSample -{ - float32_t3 centerValue; - float normalizedCenterLuma; - float normalizedNeighbourhoodAverageLuma; -}; - -// TODO: figure out what values should pixels outside have, 0.0f is incorrect -float32_t3 sampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, in RWTexture2DArray cascade, uint32_t cascadeIndex) -{ - const int32_t2 texelCoord = currentCoord + offset; - if (any(texelCoord < int32_t2(0, 0))) - return float32_t3(0.0f, 0.0f, 0.0f); - - float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); - return float32_t3(output.r, output.g, output.b); -} - -float32_t calcLuma(in float32_t3 col) -{ - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); -} - -CascadeSample SampleCascade(in int32_t2 coord, in RWTexture2DArray cascade, in uint cascadeIndex, in float reciprocalBaseI) -{ - float32_t3 neighbourhood[9]; - neighbourhood[0] = sampleCascadeTexel(coord, int32_t2(-1, -1), cascade, cascadeIndex); - neighbourhood[1] = sampleCascadeTexel(coord, int32_t2(0, -1), cascade, cascadeIndex); - neighbourhood[2] = sampleCascadeTexel(coord, int32_t2(1, -1), cascade, cascadeIndex); - neighbourhood[3] = sampleCascadeTexel(coord, int32_t2(-1, 0), cascade, cascadeIndex); - neighbourhood[4] = sampleCascadeTexel(coord, int32_t2(0, 0), cascade, cascadeIndex); - neighbourhood[5] = sampleCascadeTexel(coord, int32_t2(1, 0), cascade, cascadeIndex); - neighbourhood[6] = sampleCascadeTexel(coord, int32_t2(-1, 1), cascade, cascadeIndex); - neighbourhood[7] = sampleCascadeTexel(coord, int32_t2(0, 1), cascade, cascadeIndex); - neighbourhood[8] = sampleCascadeTexel(coord, int32_t2(1, 1), cascade, cascadeIndex); - - // numerical robustness - float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + - ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); - - CascadeSample retval; - retval.centerValue = neighbourhood[4]; - retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; - retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; - return retval; -} - -} // namespace impl - -struct ReweightingParameters -{ - uint32_t lastCascadeIndex; - float initialEmin; // a minimum image brightness that we always consider reliable - float reciprocalBase; - float reciprocalN; - float reciprocalKappa; - float colorReliabilityFactor; - float NOverKappa; -}; - -ReweightingParameters computeReweightingParameters(float base, uint32_t sampleCount, float minReliableLuma, float kappa, uint32_t cascadeSize) -{ - ReweightingParameters retval; - retval.lastCascadeIndex = cascadeSize - 1u; - retval.initialEmin = minReliableLuma; - retval.reciprocalBase = 1.f / base; - const float N = float(sampleCount); - retval.reciprocalN = 1.f / N; - retval.reciprocalKappa = 1.f / kappa; - // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have - // allow up to ~ more energy in one sample to lessen bias in some cases - retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; - retval.NOverKappa = N * retval.reciprocalKappa; - - return retval; -} - -float32_t3 reweight(in ReweightingParameters params, in RWTexture2DArray cascade, in int32_t2 coord) -{ - float reciprocalBaseI = 1.f; - impl::CascadeSample curr = impl::SampleCascade(coord, cascade, 0u, reciprocalBaseI); - - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); - float Emin = params.initialEmin; - - float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; - for (uint i = 0u; i <= params.lastCascadeIndex; i++) - { - const bool notFirstCascade = i != 0u; - const bool notLastCascade = i != params.lastCascadeIndex; - - impl::CascadeSample next; - if (notLastCascade) - { - reciprocalBaseI *= params.reciprocalBase; - next = impl::SampleCascade(coord, cascade, i + 1u, reciprocalBaseI); - } - - float reliability = 1.f; - // sample counting-based reliability estimation - if (params.reciprocalKappa <= 1.f) - { - float localReliability = curr.normalizedCenterLuma; - // reliability in 3x3 pixel block (see robustness) - float globalReliability = curr.normalizedNeighbourhoodAverageLuma; - if (notFirstCascade) - { - localReliability += prevNormalizedCenterLuma; - globalReliability += prevNormalizedNeighbourhoodAverageLuma; - } - if (notLastCascade) - { - localReliability += next.normalizedCenterLuma; - globalReliability += next.normalizedNeighbourhoodAverageLuma; - } - // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) - reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; - { - const float accumLuma = impl::calcLuma(accumulation); - if (accumLuma > Emin) - Emin = accumLuma; - - const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; - - reliability += colorReliability; - reliability *= params.NOverKappa; - reliability -= params.reciprocalKappa; - reliability = clamp(reliability * 0.5f, 0.f, 1.f); - } - } - accumulation += curr.centerValue * reliability; - - prevNormalizedCenterLuma = curr.normalizedCenterLuma; - prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; - curr = next; - } - - return accumulation; -} - -} -} -} - -#endif \ No newline at end of file diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 9dc7847acf..fd8dccbd01 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -361,7 +361,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") #rwmc -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/rwmc.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/Resolve.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/CascadeAccumulator.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/SplattingParameters.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/rwmc/ResolveParameters.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL") From a86a1b328af2e9e177599a5220e350956501616e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 14 Nov 2025 17:06:17 +0700 Subject: [PATCH 045/140] added missing typename qualifier Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index cb8d3b27d1..6484ef38b7 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -158,7 +158,7 @@ struct Resolver CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI) { - CascadeAccessor::output_type tmp; + typename CascadeAccessor::output_type tmp; output_type neighbourhood[9]; neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex); neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex); From df9faf952fb1b4e07dcfca6699b49a140e41b1f8 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 17 Nov 2025 16:20:24 +0700 Subject: [PATCH 046/140] minor fixes to spherical rect, latest example Signed-off-by: Corey --- .../hlsl/sampling/spherical_rectangle.hlsl | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 127a7194b2..c5503e2663 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -35,7 +35,7 @@ struct SphericalRectangle vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / hlsl::sqrt((vector4_type)(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); + const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], @@ -50,32 +50,32 @@ struct SphericalRectangle angle_adder.addCosine(cosGamma[3]); scalar_type q = angle_adder.getSumofArccos(); - const scalar_type k = 2 * numbers::pi - q; + const scalar_type k = scalar_type(2.0) * numbers::pi - q; const scalar_type b0 = n_z[0]; const scalar_type b1 = n_z[2]; - S = p + q - 2 * numbers::pi; + S = p + q - scalar_type(2.0) * numbers::pi; - const scalar_type CLAMP_EPS = 1e-5f; + const scalar_type CLAMP_EPS = 1e-5; - // flip z axsis if rect.r0.z > 0 - const uint32_t zFlipMask = (bit_cast(rect.r0.z) ^ 0x80000000u) & 0x80000000u; - rect.r0.z = bit_cast(bit_cast(rect.r0.z) ^ zFlipMask); + // flip z axis if rect.r0.z > 0 + rect.r0.z = ieee754::flipSignIfRHSNegative(rect.r0.z, -rect.r0.z); vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); const scalar_type au = uv.x * S + k; const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] - const scalar_type cu = bit_cast(bit_cast(1.0 / hlsl::sqrt(cu_2)) ^ (bit_cast(fu) & 0x80000000u)); + const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); - scalar_type xu = -(cu * rect.r0.z) * 1.0 / hlsl::sqrt(1 - cu * cu); + scalar_type xu = -(cu * rect.r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); xu = hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; const scalar_type d = hlsl::sqrt(d_2); const scalar_type h0 = rect.r0.y / hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + uv.y * (h1 - h0), hv2 = hv * hv; - const scalar_type yv = (hv2 < 1 - CLAMP_EPS) ? (hv * d) / hlsl::sqrt(1 - hv2) : r1.y; + const scalar_type hv = h0 + uv.y * (h1 - h0); + const scalar_type hv2 = hv * hv; + const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); } From e2031095fe85a5168936d51fd2069df2b5a6c709 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 18 Nov 2025 12:00:41 +0700 Subject: [PATCH 047/140] quaternion struct, renamed spherical tri/rect shapes Signed-off-by: Corey --- .../nbl/builtin/hlsl/math/quaternions.hlsl | 104 ++++++++++++++++++ .../hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../hlsl/sampling/spherical_triangle.hlsl | 21 +--- ...ectangle.hlsl => spherical_rectangle.hlsl} | 4 +- ...{triangle.hlsl => spherical_triangle.hlsl} | 4 +- .../asset/utils/CSmoothNormalGenerator.cpp | 2 +- src/nbl/builtin/CMakeLists.txt | 6 +- 7 files changed, 118 insertions(+), 25 deletions(-) create mode 100644 include/nbl/builtin/hlsl/math/quaternions.hlsl rename include/nbl/builtin/hlsl/shapes/{rectangle.hlsl => spherical_rectangle.hlsl} (94%) rename include/nbl/builtin/hlsl/shapes/{triangle.hlsl => spherical_triangle.hlsl} (97%) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl new file mode 100644 index 0000000000..aca8d1ff3c --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -0,0 +1,104 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + +template +struct quaternion_t +{ + using this_t = quaternion_t; + using scalar_type = T; + using data_type = vector; + using vector3_type = vector; + using matrix_type = matrix; + + static this_t createFromTruncated(const vector3_type first3Components) + { + this_t retval; + retval.data.xyz = first3Components; + retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components, first3Components)); + return retval; + } + + static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle) + { + using AsUint = typename unsigned_integer_of_size::type; + const AsUint negationMask = hlsl::bit_cast(totalPseudoAngle) & AsUint(0x80000000u); + const data_type adjEnd = hlsl::bit_cast(hlsl::bit_cast(end.data) ^ negationMask); + + this_t retval; + retval.data = hlsl::mix(start.data, adjEnd, fraction); + return retval; + } + + static this_t lerp(const this_t start, const this_t end, const scalar_type fraction) + { + return lerp(start, end, fraction, hlsl::dot(start.data, end.data)); + } + + static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3) + { + const scalar_type A = scalar_type(1.0904) + angle * (scalar_type(-3.2452) + angle * (scalar_type(3.55645) - angle * scalar_type(1.43519))); + const scalar_type B = scalar_type(0.848013) + angle * (scalar_type(-1.06021) + angle * scalar_type(0.215638)); + const scalar_type k = A * interpolantPrecalcTerm2 + B; + return fraction + interpolantPrecalcTerm3 * k; + } + + static this_t flerp(const this_t start, const this_t end, const scalar_type fraction) + { + const scalar_type pseudoAngle = hlsl::dot(start.data,end.data); + const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5); + const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0)); + const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3); + + this_t retval = lerp(start,end,adjFrac,pseudoAngle); + retval.data = hlsl::normalize(retval.data); + return retval; + } + + matrix_type constructMatrix() + { + matrix_type mat; + mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0); + mat[1] = data.yzx * data.xzw + data.zxy * data.wxz * vector3_type(-1.0, 1.0, 1.0); + mat[2] = data.yzx * data.wyx + data.zxy * data.xwy * vector3_type( 1.0,-1.0, 1.0); + mat[0][0] = scalar_type(0.5) - mat[0][0]; + mat[1][1] = scalar_type(0.5) - mat[1][1]; + mat[2][2] = scalar_type(0.5) - mat[2][2]; + mat *= scalar_type(2.0); + return hlsl::transpose(mat); // TODO: double check transpose? + } + + static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart) + { + vector3_type planeNormal = hlsl::cross(start,preScaledWaypoint); + + cosAngleFromStart *= scalar_type(0.5); + const scalar_type sinAngle = hlsl::sqrt(scalar_type(0.5) - cosAngleFromStart); + const scalar_type cosAngle = hlsl::sqrt(scalar_type(0.5) + cosAngleFromStart); + + planeNormal *= sinAngle; + const vector3_type precompPart = hlsl::cross(planeNormal, start) * scalar_type(2.0); + + return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart); + } + + data_type data; +}; + +} +} +} + +#endif diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index c5503e2663..f5c19fb864 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace nbl { diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index fd3a616e8d..0c86b69793 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -8,7 +8,8 @@ #include #include #include -#include +#include +#include namespace nbl { @@ -31,20 +32,6 @@ struct SphericalTriangle return retval; } - vector3_type slerp_delta(NBL_CONST_REF_ARG(vector3_type) start, NBL_CONST_REF_ARG(vector3_type) preScaledWaypoint, scalar_type cosAngleFromStart) - { - vector3_type planeNormal = nbl::hlsl::cross(start,preScaledWaypoint); - - cosAngleFromStart *= 0.5; - const scalar_type sinAngle = nbl::hlsl::sqrt(0.5 - cosAngleFromStart); - const scalar_type cosAngle = nbl::hlsl::sqrt(0.5 + cosAngleFromStart); - - planeNormal *= sinAngle; - const vector3_type precompPart = nbl::hlsl::cross(planeNormal, start) * 2.0; - - return precompPart * cosAngle + nbl::hlsl::cross(planeNormal, precompPart); - } - // WARNING: can and will return NAN if one or three of the triangle edges are near zero length vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u) { @@ -64,7 +51,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); + C_s += math::quaternion_t::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); } vector3_type retval = tri.vertex1; @@ -74,7 +61,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); + retval += math::quaternion_t::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); } return retval; } diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl similarity index 94% rename from include/nbl/builtin/hlsl/shapes/rectangle.hlsl rename to include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 434918cc09..daeb3175c3 100644 --- a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ -#define _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SHAPES_SPHERICAL_RECTANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_SPHERICAL_RECTANGLE_INCLUDED_ #include #include diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl similarity index 97% rename from include/nbl/builtin/hlsl/shapes/triangle.hlsl rename to include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index de82849389..ff2b3aec35 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ -#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SHAPES_SPHERICAL_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_SPHERICAL_TRIANGLE_INCLUDED_ #include #include diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 8c03ad99b9..43413152a8 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,7 +5,7 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" -#include "nbl/builtin/hlsl/shapes/triangle.hlsl" +#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl" #include diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fd8dccbd01..277d60e5bd 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -229,6 +229,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/polar.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") @@ -252,8 +253,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") -LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/rectangle.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") #sampling LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") From 07bf4d9e118c53f1b75344f622ee1b95d93bb5fb Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 18 Nov 2025 15:49:11 +0700 Subject: [PATCH 048/140] make PartitionRandVar in struct and templated Signed-off-by: Corey --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 4 +- .../bxdf/transmission/smooth_dielectric.hlsl | 7 ++- include/nbl/builtin/hlsl/math/functions.hlsl | 15 ------- include/nbl/builtin/hlsl/sampling/basic.hlsl | 44 +++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 5 files changed, 53 insertions(+), 18 deletions(-) create mode 100644 include/nbl/builtin/hlsl/sampling/basic.hlsl diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index e88d5fccb7..5e5e543791 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -8,6 +8,7 @@ #include "nbl/builtin/hlsl/bxdf/config.hlsl" #include "nbl/builtin/hlsl/bxdf/ndf.hlsl" #include "nbl/builtin/hlsl/bxdf/fresnel.hlsl" +#include "nbl/builtin/hlsl/sampling/basic.hlsl" #include "nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl" namespace nbl @@ -302,7 +303,8 @@ struct SCookTorrance scalar_type rcpChoiceProb; scalar_type z = u.z; - bool transmitted = math::partitionRandVariable(reflectance, z, rcpChoiceProb); + sampling::PartitionRandVariable partitionRandVariable; + bool transmitted = partitionRandVariable(reflectance, z, rcpChoiceProb); const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted); bool valid; diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl index 17400adfe2..712b614755 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl @@ -6,6 +6,7 @@ #include "nbl/builtin/hlsl/bxdf/common.hlsl" #include "nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl" +#include "nbl/builtin/hlsl/sampling/basic.hlsl" #include "nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl" namespace nbl @@ -39,7 +40,8 @@ struct SSmoothDielectric const scalar_type reflectance = fresnel::Dielectric::__call(orientedEta.value*orientedEta.value, interaction.getNdotV(_clamp))[0]; scalar_type rcpChoiceProb; - bool transmitted = math::partitionRandVariable(reflectance, u.z, rcpChoiceProb); + sampling::PartitionRandVariable partitionRandVariable; + bool transmitted = partitionRandVariable(reflectance, u.z, rcpChoiceProb); ray_dir_info_type V = interaction.getV(); Refract r = Refract::create(V.getDirection(), interaction.getN()); @@ -125,7 +127,8 @@ struct SThinSmoothDielectric scalar_type rcpChoiceProb; scalar_type z = u.z; - const bool transmitted = math::partitionRandVariable(reflectionProb, z, rcpChoiceProb); + sampling::PartitionRandVariable partitionRandVariable; + const bool transmitted = partitionRandVariable(reflectionProb, z, rcpChoiceProb); remainderMetadata = hlsl::mix(reflectance, hlsl::promote(1.0) - reflectance, transmitted) * rcpChoiceProb; ray_dir_info_type V = interaction.getV(); diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index 046c72c527..a52eb21c23 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -120,21 +120,6 @@ void frisvad(NBL_CONST_REF_ARG(T) normal, NBL_REF_ARG(T) tangent, NBL_REF_ARG(T) } } -bool partitionRandVariable(float leftProb, NBL_REF_ARG(float) xi, NBL_REF_ARG(float) rcpChoiceProb) -{ - const float32_t NEXT_ULP_AFTER_UNITY = bit_cast(0x3f800001u); - const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; - - // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY - xi -= pickRight ? leftProb : 0.0f; - - rcpChoiceProb = 1.0f / (pickRight ? (1.0f - leftProb) : leftProb); - xi *= rcpChoiceProb; - - return pickRight; -} - - namespace impl { template diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl new file mode 100644 index 0000000000..d0738dd930 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BASIC_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_BASIC_INCLUDED_ + +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template) +struct PartitionRandVariable +{ + using floating_point_type = T; + using uint_type = typename unsigned_integer_of_size::type; + + bool operator()(floating_point_type leftProb, NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) + { + const floating_point_type NEXT_ULP_AFTER_UNITY = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); + const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; + + // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY + xi -= pickRight ? leftProb : floating_point_type(0.0); + + rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); + xi *= rcpChoiceProb; + + return pickRight; + } +}; + + +} +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 277d60e5bd..7b08519b5c 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -257,6 +257,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hls LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") #sampling +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") From ff88a7d294699c512577f00aaab2ea7961c19aef Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 18 Nov 2025 17:11:12 +0700 Subject: [PATCH 049/140] precompute values in linear, bilinear sampling; make box muller a struct Signed-off-by: Corey --- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 7 +++-- .../hlsl/sampling/box_muller_transform.hlsl | 27 +++++++++++++------ include/nbl/builtin/hlsl/sampling/linear.hlsl | 17 +++++++----- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 42a923f650..746713e4c4 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -28,13 +28,13 @@ struct Bilinear { Bilinear retval; retval.bilinearCoeffs = bilinearCoeffs; + retval.twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); return retval; } vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u) { vector2_type u = _u; - const vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); Linear lineary = Linear::create(twiceAreasUnderXCurve); u.y = lineary.generate(u.y); @@ -52,7 +52,10 @@ struct Bilinear return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); } - vector4_type bilinearCoeffs; + // unit square: x0y0 x1y0 + // x0y1 x1y1 + vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) + vector2_type twiceAreasUnderXCurve; }; } diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index dcac2279be..93cea06ee0 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_ -#define _NBL_BUILTIN_HLSL_BOX_MULLER_TRANSFORM_INCLUDED_ +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_BOX_MULLER_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_BOX_MULLER_TRANSFORM_INCLUDED_ #include "nbl/builtin/hlsl/math/functions.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" @@ -12,15 +12,26 @@ namespace nbl { namespace hlsl { +namespace sampling +{ -template -vector boxMullerTransform(vector xi, T stddev) +template) +struct BoxMullerTransform { - T sinPhi, cosPhi; - math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); - return vector(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; -} + using scalar_type = T; + using vector2_type = vector; + + vector2_type operator()(vector2_type xi) + { + scalar_type sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; + } + T stddev; +}; + +} } } diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 12d445eefe..ddd7bcf8df 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -21,21 +21,26 @@ struct Linear using scalar_type = T; using vector2_type = vector; - static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) + static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) // start and end importance values (start, end) { Linear retval; - retval.linearCoeffs = linearCoeffs; + retval.linearCoeffStart = linearCoeffs[0]; + retval.rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); + vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; + retval.squaredCoeffStart = squaredCoeffs[0]; + retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; return retval; } scalar_type generate(scalar_type u) { - const scalar_type rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); - const vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; - return nbl::hlsl::abs(rcpDiff) < numeric_limits::max ? (linearCoeffs[0] - nbl::hlsl::sqrt(nbl::hlsl::mix(squaredCoeffs[0], squaredCoeffs[1], u))) * rcpDiff : u; + return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); } - vector2_type linearCoeffs; + scalar_type linearCoeffStart; + scalar_type rcpDiff; + scalar_type squaredCoeffStart; + scalar_type squaredCoeffDiff; }; } From 6cb53244457a3c533893f6fee5270373e5f9e330 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 19 Nov 2025 14:36:35 +0700 Subject: [PATCH 050/140] precompute cos_sides, csc_sides in shape spherical_tri Signed-off-by: Corey --- .../hlsl/shapes/spherical_triangle.hlsl | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index ff2b3aec35..616ad17b53 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -31,24 +31,22 @@ struct SphericalTriangle retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); + retval.cos_sides = vector3_type(hlsl::dot(vertex1, vertex2), hlsl::dot(vertex2, vertex0), hlsl::dot(vertex0, vertex1)); + const vector3_type csc_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; + retval.csc_sides.x = hlsl::rsqrt(csc_sides2.x); + retval.csc_sides.y = hlsl::rsqrt(csc_sides2.y); + retval.csc_sides.z = hlsl::rsqrt(csc_sides2.z); return retval; } - bool pyramidAngles(NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides) + bool pyramidAngles() { - cos_sides = vector3_type(hlsl::dot(vertex1, vertex2), hlsl::dot(vertex2, vertex0), hlsl::dot(vertex0, vertex1)); - csc_sides = (vector3_type)(1.f) - cos_sides * cos_sides; - csc_sides.x = hlsl::rsqrt(csc_sides.x); - csc_sides.y = hlsl::rsqrt(csc_sides.y); - csc_sides.z = hlsl::rsqrt(csc_sides.z); - return hlsl::any >(csc_sides >= (vector3_type)(numeric_limits::max)); } scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) { - vector3_type cos_sides,csc_sides; - if (pyramidAngles(cos_sides, csc_sides)) + if (pyramidAngles()) return 0.f; // these variables might eventually get optimized out @@ -58,8 +56,8 @@ struct SphericalTriangle csc_c = csc_sides[2]; // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) - sin_vertices = hlsl::sqrt((vector3_type)1.f - cos_vertices * cos_vertices); + cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + sin_vertices = hlsl::sqrt(hlsl::promote(1.0) - cos_vertices * cos_vertices); math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); angle_adder.addAngle(cos_vertices[1], sin_vertices[1]); @@ -76,7 +74,7 @@ struct SphericalTriangle scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { - if (pyramidAngles(cos_sides, csc_sides)) + if (pyramidAngles()) return 0.f; vector3_type awayFromEdgePlane0 = hlsl::cross(vertex1, vertex2) * csc_sides[0]; @@ -88,7 +86,7 @@ struct SphericalTriangle cos_vertices[1] = hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); cos_vertices[2] = hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, (vector3_type)(-1.f), (vector3_type)1.f); + cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); @@ -100,6 +98,8 @@ struct SphericalTriangle vector3_type vertex0; vector3_type vertex1; vector3_type vertex2; + vector3_type cos_sides; + vector3_type csc_sides; }; } From 7a4f0d62443ece8375702146d04b51c4e7affe0a Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Fri, 14 Nov 2025 21:07:07 +0100 Subject: [PATCH 051/140] Fixed `ResolveAccessor` concept Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 60 +++++++++++----------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index 6484ef38b7..d8f777d277 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -5,6 +5,8 @@ #include #include #include +#include +#include namespace nbl { @@ -19,23 +21,21 @@ namespace rwmc // not the greatest syntax but works #define NBL_CONCEPT_PARAM_0 (a,T) #define NBL_CONCEPT_PARAM_1 (scalar,VectorScalarType) -#define NBL_CONCEPT_PARAM_2 (vec,vector) // start concept NBL_CONCEPT_BEGIN(2) // need to be defined AFTER the concept begins #define a NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 #define scalar NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 -#define vec NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_2 NBL_CONCEPT_END( - ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vec)))) + ((NBL_CONCEPT_REQ_EXPR)((a.calcLuma(vector(scalar, scalar, scalar))))) ); #undef a -#undef vec +#undef scalar #include /* ResolveAccessor is required to: * - satisfy `LoadableImage` concept requirements -* - implement function called `calcLuma` which calculates luma from a pixel value +* - implement function called `calcLuma` which calculates luma from a 3 component pixel value */ template @@ -50,9 +50,9 @@ struct ResolveAccessorAdaptor RWTexture2DArray cascade; - float32_t calcLuma(in float32_t3 col) + float32_t calcLuma(NBL_REF_ARG(float32_t3) col) { - return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); } template @@ -69,10 +69,11 @@ struct ResolveAccessorAdaptor } }; -template //NBL_PRIMARY_REQUIRES(ResolveAccessor) +template && ResolveAccessor) struct Resolver { - using output_type = OutputColorType; + using output_type = OutputColorTypeVec; + using scalar_t = typename vector_traits::scalar_type; struct CascadeSample { @@ -91,13 +92,15 @@ struct Resolver output_type operator()(NBL_REF_ARG(CascadeAccessor) acc, const int16_t2 coord) { - float reciprocalBaseI = 1.f; + using scalar_t = typename vector_traits::scalar_type; + + scalar_t reciprocalBaseI = 1.f; CascadeSample curr = __sampleCascade(acc, coord, 0u, reciprocalBaseI); - float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); - float Emin = params.initialEmin; + output_type accumulation = output_type(0.0f, 0.0f, 0.0f); + scalar_t Emin = params.initialEmin; - float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + scalar_t prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; for (int16_t i = 0u; i <= params.lastCascadeIndex; i++) { const bool notFirstCascade = i != 0; @@ -110,13 +113,13 @@ struct Resolver next = __sampleCascade(acc, coord, int16_t(i + 1), reciprocalBaseI); } - float reliability = 1.f; + scalar_t reliability = 1.f; // sample counting-based reliability estimation if (params.reciprocalKappa <= 1.f) { - float localReliability = curr.normalizedCenterLuma; + scalar_t localReliability = curr.normalizedCenterLuma; // reliability in 3x3 pixel block (see robustness) - float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + scalar_t globalReliability = curr.normalizedNeighbourhoodAverageLuma; if (notFirstCascade) { localReliability += prevNormalizedCenterLuma; @@ -130,11 +133,11 @@ struct Resolver // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; { - const float accumLuma = acc.calcLuma(accumulation); + const scalar_t accumLuma = acc.calcLuma(accumulation); if (accumLuma > Emin) Emin = accumLuma; - const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + const scalar_t colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; reliability += colorReliability; reliability *= params.NOverKappa; @@ -156,19 +159,18 @@ struct Resolver // pseudo private stuff: - CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, float reciprocalBaseI) + CascadeSample __sampleCascade(NBL_REF_ARG(CascadeAccessor) acc, int16_t2 coord, uint16_t cascadeIndex, scalar_t reciprocalBaseI) { - typename CascadeAccessor::output_type tmp; output_type neighbourhood[9]; - neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex); - neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex); - neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex); - neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex); - neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex); - neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex); - neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex); - neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex); - neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex); + neighbourhood[0] = acc.template get(coord + int16_t2(-1, -1), cascadeIndex).xyz; + neighbourhood[1] = acc.template get(coord + int16_t2(0, -1), cascadeIndex).xyz; + neighbourhood[2] = acc.template get(coord + int16_t2(1, -1), cascadeIndex).xyz; + neighbourhood[3] = acc.template get(coord + int16_t2(-1, 0), cascadeIndex).xyz; + neighbourhood[4] = acc.template get(coord + int16_t2(0, 0), cascadeIndex).xyz; + neighbourhood[5] = acc.template get(coord + int16_t2(1, 0), cascadeIndex).xyz; + neighbourhood[6] = acc.template get(coord + int16_t2(-1, 1), cascadeIndex).xyz; + neighbourhood[7] = acc.template get(coord + int16_t2(0, 1), cascadeIndex).xyz; + neighbourhood[8] = acc.template get(coord + int16_t2(1, 1), cascadeIndex).xyz; // numerical robustness float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + From d97664c63e2fc3847d429ccedb02348d7bed26a2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 20 Nov 2025 17:29:53 +0700 Subject: [PATCH 052/140] fix spherical tri precompute Signed-off-by: Corey --- include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 616ad17b53..246ca5a084 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -31,7 +31,7 @@ struct SphericalTriangle retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); - retval.cos_sides = vector3_type(hlsl::dot(vertex1, vertex2), hlsl::dot(vertex2, vertex0), hlsl::dot(vertex0, vertex1)); + retval.cos_sides = vector3_type(hlsl::dot(retval.vertex1, retval.vertex2), hlsl::dot(retval.vertex2, retval.vertex0), hlsl::dot(retval.vertex0, retval.vertex1)); const vector3_type csc_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; retval.csc_sides.x = hlsl::rsqrt(csc_sides2.x); retval.csc_sides.y = hlsl::rsqrt(csc_sides2.y); From 0b089b970cb50fbcb32c9f6121b953fd69dbd157 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 26 Nov 2025 14:55:31 +0700 Subject: [PATCH 053/140] make rwmc accumulator match concept Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 77cfb3c283..279b3c509a 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -41,6 +41,7 @@ struct CascadeAccumulator using cascade_layer_scalar_type = typename vector_traits::scalar_type; using this_t = CascadeAccumulator; + using input_sample_type = CascadeLayerType; using output_storage_type = CascadeEntry; using initialization_data = SplattingParameters; output_storage_type accumulation; @@ -66,7 +67,7 @@ struct CascadeAccumulator } // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp - void addSample(uint32_t sampleCount, CascadeLayerType _sample) + void addSample(uint32_t sampleCount, input_sample_type _sample) { const cascade_layer_scalar_type log2Start = splattingParameters.log2Start; const cascade_layer_scalar_type log2Base = splattingParameters.log2Base; From 3ba44cb4ec3f52ca32cce03675a4f50965dc30c7 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 10:58:12 +0700 Subject: [PATCH 054/140] pack rwmc params to half 2x16 Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 5 +++-- include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 279b3c509a..593e267a26 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -69,8 +69,9 @@ struct CascadeAccumulator // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const cascade_layer_scalar_type log2Start = splattingParameters.log2Start; - const cascade_layer_scalar_type log2Base = splattingParameters.log2Base; + const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2); + const cascade_layer_scalar_type log2Start = unpackedParams[0]; + const cascade_layer_scalar_type log2Base = unpackedParams[1]; const cascade_layer_scalar_type luma = getLuma(_sample); const cascade_layer_scalar_type log2Luma = log2(luma); const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index e74dd0e5bd..c549d83be6 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -12,8 +12,10 @@ namespace rwmc struct SplattingParameters { - float log2Start; - float log2Base; + // float16_t log2Start; 0 + // float16_t log2Base; 1 + // pack as Half2x16 + int32_t packedLog2; }; } From 5a80b45d664e147a58443165c14eaa3a8cf55d83 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 26 Nov 2025 17:44:36 +0300 Subject: [PATCH 055/140] took 1transformation_matrix_utils.hlsl` from Arek's `cameraz` branch Signed-off-by: Corey --- .../transformation_matrix_utils.hlsl | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..1ad16dc28d --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,235 @@ +#ifndef _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +//TODO: stolen from cameraz branch, don't have epsilonEqual here, maybe uncomment when merging from imguizmo-lights branch +//// TODO: -> move somewhere else and nbl:: to implement it +//template +//bool isOrthoBase(const T& x, const T& y, const T& z, const E epsilon = 1e-6) +//{ +// auto isNormalized = [](const auto& v, const auto& epsilon) -> bool +// { +// return glm::epsilonEqual(glm::length(v), 1.0, epsilon); +// }; +// +// auto isOrthogonal = [](const auto& a, const auto& b, const auto& epsilon) -> bool +// { +// return glm::epsilonEqual(glm::dot(a, b), 0.0, epsilon); +// }; +// +// return isNormalized(x, epsilon) && isNormalized(y, epsilon) && isNormalized(z, epsilon) && +// isOrthogonal(x, y, epsilon) && isOrthogonal(x, z, epsilon) && isOrthogonal(y, z, epsilon); +//} +//// <- + +template +matrix getMatrix3x4As4x4(const matrix& mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +template +matrix getMatrix3x3As4x4(const matrix& mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = float32_t4(mat[i], 1.0f); + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +template +inline vector getCastedVector(const vector& in) +{ + vector out; + + for (int i = 0; i < N; ++i) + out[i] = (Tout)(in[i]); + + return out; +} + +template +inline matrix getCastedMatrix(const matrix& in) +{ + matrix out; + + for (int i = 0; i < N; ++i) + out[i] = getCastedVector(in[i]); + + return out; +} + +// TODO: use portable_float when merged +//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) +template +inline matrix concatenateBFollowedByA(const matrix& a, const matrix& b) +{ + const auto a4x4 = getMatrix3x4As4x4(a); + const auto b4x4 = getMatrix3x4As4x4(b); + return matrix(mul(a4x4, b4x4)); +} + +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) + +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = glm::normalize(target - position); + const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = glm::normalize(position - target); + const vector xaxis = glm::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +// TODO: test, check if there is better implementation +// TODO: move quaternion to nbl::hlsl +// TODO: why NBL_REF_ARG(MatType) doesn't work????? + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(matrix& outMat, NBL_CONST_REF_ARG(core::quaternion) quat) +{ + static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.y * quat.y + quat.z * quat.z), + 2 * (quat.x * quat.y - quat.z * quat.w), + 2 * (quat.x * quat.z + quat.y * quat.w), + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.x * quat.y + quat.z * quat.w), + 1 - 2 * (quat.x * quat.x + quat.z * quat.z), + 2 * (quat.y * quat.z - quat.x * quat.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.x * quat.z - quat.y * quat.w), + 2 * (quat.y * quat.z + quat.x * quat.w), + 1 - 2 * (quat.x * quat.x + quat.y * quat.y), + outMat[2][3] + ); +} + +template +inline void setTranslation(matrix& outMat, NBL_CONST_REF_ARG(vector) translation) +{ + static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + + +template +inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + + return m; +} +template +inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +} +} + +#endif \ No newline at end of file From e14e8ecaf6a75fd68fb3fcc435089bbad89fdc75 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 2 Dec 2025 17:04:02 +0700 Subject: [PATCH 056/140] change quaternion struct name to match what it will be Signed-off-by: Corey --- include/nbl/builtin/hlsl/math/quaternions.hlsl | 4 ++-- include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl index aca8d1ff3c..8d50202f4e 100644 --- a/include/nbl/builtin/hlsl/math/quaternions.hlsl +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -15,9 +15,9 @@ namespace math { template -struct quaternion_t +struct quaternion { - using this_t = quaternion_t; + using this_t = quaternion; using scalar_type = T; using data_type = vector; using vector3_type = vector; diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 0c86b69793..c31e194788 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -51,7 +51,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion_t::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); + C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); } vector3_type retval = tri.vertex1; @@ -61,7 +61,7 @@ struct SphericalTriangle { const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion_t::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); + retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); } return retval; } From a361548bb9d0dbb37a97030c924946b9502df128 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 4 Dec 2025 11:17:02 +0700 Subject: [PATCH 057/140] removed temp fix for mix_helper require Signed-off-by: Corey --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index ae78ea92c3..5e3418efe0 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -269,8 +269,8 @@ struct mix_helper) > }; template -NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable && concepts::Boolean) -struct mix_helper && concepts::Boolean) > +NBL_PARTIAL_REQ_TOP(spirv::SelectIsCallable) +struct mix_helper) > { using return_t = conditional_t, vector::scalar_type, vector_traits::Dimension>, T>; // for a component of a that is false, the corresponding component of x is returned From 77c29e5a889c504166c9906574ed076e160fe54b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 14:04:25 +0700 Subject: [PATCH 058/140] fixes more nan problems + a few bugs in iridescent fresnel Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 71 +++++++++++++--------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index f7655e9978..0f2b3486ab 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -508,25 +508,26 @@ struct iridescent_helper using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - // returns reflectance R = (rp, rs), phi is the phase shift for each plane of polarization (p,s) - static void phase_shift(const vector_type orientedEta, const vector_type orientedEtak, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) + // returns phi, the phase shift for each plane of polarization (p,s) + static void phase_shift(const vector_type ior1, const vector_type ior2, const vector_type iork2, const vector_type cosTheta, NBL_REF_ARG(vector_type) phiS, NBL_REF_ARG(vector_type) phiP) { - vector_type cosTheta_2 = cosTheta * cosTheta; - vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta_2; - const vector_type eta2 = orientedEta*orientedEta; - const vector_type etak2 = orientedEtak*orientedEtak; + const vector_type cosTheta2 = cosTheta * cosTheta; + const vector_type sinTheta2 = hlsl::promote(1.0) - cosTheta2; + const vector_type ior1_2 = ior1*ior1; + const vector_type ior2_2 = ior2*ior2; + const vector_type iork2_2 = iork2*iork2; - vector_type z = eta2 - etak2 - sinTheta2; - vector_type w = hlsl::sqrt(z * z + scalar_type(4.0) * eta2 * eta2 * etak2); - vector_type a2 = (z + w) * hlsl::promote(0.5); - vector_type b2 = (w - z) * hlsl::promote(0.5); - vector_type b = hlsl::sqrt(b2); + const vector_type z = ior2_2 * (hlsl::promote(1.0) - iork2_2) - ior1_2 * sinTheta2; + const vector_type w = hlsl::sqrt(z*z + scalar_type(4.0) * ior2_2 * ior2_2 * iork2_2); + const vector_type a2 = hlsl::max(z + w, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type b2 = hlsl::max(w - z, hlsl::promote(0.0)) * hlsl::promote(0.5); + const vector_type a = hlsl::sqrt(a2); + const vector_type b = hlsl::sqrt(b2); - const vector_type t0 = eta2 + etak2; - const vector_type t1 = t0 * cosTheta_2; - - phiS = hlsl::atan2(hlsl::promote(2.0) * b * cosTheta, a2 + b2 - cosTheta_2); - phiP = hlsl::atan2(hlsl::promote(2.0) * eta2 * cosTheta * (hlsl::promote(2.0) * orientedEtak * hlsl::sqrt(a2) - etak2 * b), t1 - a2 + b2); + phiS = hlsl::atan2(scalar_type(2.0) * ior1 * b * cosTheta, a2 + b2 - ior1_2*cosTheta2); + const vector_type k2_plus_one = hlsl::promote(1.0) + iork2_2; + phiP = hlsl::atan2(scalar_type(2.0) * ior1 * ior2_2 * cosTheta * (scalar_type(2.0) * iork2 * a - (hlsl::promote(1.0) - iork2_2) * b), + ior2_2 * cosTheta2 * k2_plus_one * k2_plus_one - ior1_2*(a2+b2)); } // Evaluation XYZ sensitivity curves in Fourier space @@ -544,7 +545,8 @@ struct iridescent_helper } template - static T __call(const vector_type _D, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) + static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, + const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); @@ -593,8 +595,8 @@ struct iridescent_helper vector_type I = hlsl::promote(0.0); // Evaluate the phase shift - phase_shift(eta12, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21p, phi21s); - phase_shift(eta23, etak23, cosTheta_2, phi23p, phi23s); + phase_shift(ior1, ior2, hlsl::promote(0.0), hlsl::promote(cosTheta_1), phi21s, phi21p); + phase_shift(ior2, ior3, iork3, cosTheta_2, phi23s, phi23p); phi21p = hlsl::promote(numbers::pi) - phi21p; phi21s = hlsl::promote(numbers::pi) - phi21s; @@ -633,7 +635,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I), hlsl::promote(0.0)) * hlsl::promote(0.5); + return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -643,11 +645,11 @@ struct iridescent_base using scalar_type = typename vector_traits::scalar_type; using vector_type = T; - vector_type getD() NBL_CONST_MEMBER_FUNC { return D; } - vector_type getEta12() NBL_CONST_MEMBER_FUNC { return eta12; } - vector_type getEta23() NBL_CONST_MEMBER_FUNC { return eta23; } - vector_type D; + vector_type ior1; + vector_type ior2; + vector_type ior3; + vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR }; @@ -679,6 +681,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; retval.etak23 = params.iork3/params.ior2; @@ -687,7 +693,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, base_type::iork3, + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC @@ -731,6 +738,10 @@ struct Iridescent(2.0 * params.Dinc) * params.ior2; + retval.ior1 = params.ior1; + retval.ior2 = params.ior2; + retval.ior3 = params.ior3; + retval.iork3 = params.iork3; retval.eta12 = params.ior2/params.ior1; retval.eta23 = params.ior3/params.ior2; return retval; @@ -738,7 +749,8 @@ struct Iridescent::template __call(base_type::getD(), base_type::getEta12(), base_type::getEta23(), getEtak23(), clampedCosTheta); + return impl::iridescent_helper::template __call(base_type::D, base_type::ior1, base_type::ior2, base_type::ior3, getEtak23(), + base_type::eta12, base_type::eta23, getEtak23(), clampedCosTheta); } scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta23[0]; } @@ -755,8 +767,11 @@ struct Iridescent(1.0)/base_type::eta12, flip); - orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.ior1 = base_type::ior3; + orientedFresnel.ior2 = base_type::ior2; + orientedFresnel.ior3 = base_type::ior1; + orientedFresnel.eta12 = hlsl::mix(base_type::eta12, hlsl::promote(1.0)/base_type::eta23, flip); + orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; } From 13f80523bed25535eb22feac08f012e2e564f288 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:13:32 +0700 Subject: [PATCH 059/140] fixes iridescent fresnel under transmission Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0f2b3486ab..ad83da5cf7 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -563,7 +563,7 @@ struct iridescent_helper if (hlsl::any(notTIR)) { - Dielectric::__polarized(eta12, hlsl::promote(cosTheta_1), R12p, R12s); + Dielectric::__polarized(eta12 * eta12, hlsl::promote(cosTheta_1), R12p, R12s); // Reflected part by the base // if kappa==0, base material is dielectric @@ -741,7 +741,6 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23[0]; + rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -767,9 +766,9 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); return orientedFresnel; From a9e7f39d1d53940a99ab775d2f8206a61680319c Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 5 Dec 2025 16:34:58 +0700 Subject: [PATCH 060/140] fix wrong get refraction eta in iridescent transmission Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index ad83da5cf7..0c498efb79 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -752,7 +752,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; From 6004b83f6b5d05f8e5254cbaa6b35c156eeebf2e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Dec 2025 16:04:44 +0700 Subject: [PATCH 061/140] quantized sequence packing data Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 166 ++++++++++++++++++ src/nbl/builtin/CMakeLists.txt | 1 + 2 files changed, 167 insertions(+) create mode 100644 include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl new file mode 100644 index 0000000000..788a38d499 --- /dev/null +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -0,0 +1,166 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SAMPLING_QUANTIZED_SEQUENCE_INCLUDED_ + +#include "nbl/builtin/hlsl/concepts/vector.hlsl" +#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace sampling +{ + +template +struct QuantizedSequence; + +// byteslog2 = 1,2; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) +struct QuantizedSequence 0 && BytesLog2 < 3) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; + using base_store_type = typename unsigned_integer_of_size::type; + + base_store_type getX() { return data; } + void setX(const base_store_type value) { data = value; } + + base_store_type data; +}; + +// byteslog2 = 3,4; dim = 1 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); + using store_type = vector; + + store_type getX() { return data; } + void setX(const store_type value) { data = value; } + + store_type data; +}; + +// byteslog2 = 2,3; dim = 2 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0]; } + base_store_type getY() { return data[1]; } + void setX(const base_store_type value) { data[0] = value; } + void setY(const base_store_type value) { data[1] = value; } + + store_type data; +}; + +// byteslog2 = 1; dim = 2,3,4 +template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) +struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = uint16_t; + + base_store_type getX() { return data & MASK; } + base_store_type getY() { return (data >> bits_per_component) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + + void setX(const base_store_type value) + { + data &= ~MASK; + data |= value & MASK; + } + void setY(const base_store_type value) + { + const uint16_t mask = MASK << bits_per_component; + data &= ~mask; + data |= (value & MASK) << bits_per_component; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(2u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const base_store_type value) + { + const uint16_t bits = (bits_per_component * uint16_t(3u)); + const uint16_t mask = MASK << bits; + data &= ~mask; + data |= (value & MASK) << bits; + } + + base_store_type data; +}; + +// byteslog2 = 2,3; dim = 3 +template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) +struct QuantizedSequence 2 && BytesLog2 < 5) > +{ + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; + NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); + using base_store_type = typename unsigned_integer_of_size::type; + using store_type = vector; + + base_store_type getX() { return data[0] & MASK; } + base_store_type getY() + { + base_store_type y = data[0] >> bits_per_component; + y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + return y; + } + base_store_type getZ() { return data[1] & MASK; } + + void setX(base_store_type x) + { + data[0] &= ~MASK; + data[0] |= x & MASK; + } + void setY(base_store_type y) + { + const uint16_t ybits = store_bits-bits_per_component; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= MASK; + data[1] &= MASK; + data[0] |= (y & ymask) << bits_per_component; + data[1] |= (y >> (ybits) & ymask) << bits_per_component; + } + void setZ(base_store_type z) + { + data[1] &= ~MASK; + data[1] |= z & MASK; + } + + store_type data; +}; + +// not complete because we're changing the template params next commit + +} + +} +} + +#endif diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 7b08519b5c..e76b6b6f99 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -260,6 +260,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/basic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/linear.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/bilinear.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/quantized_sequence.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/concentric_mapping.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/box_muller_transform.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/sampling/cos_weighted.hlsl") From e456e26a4c912d850c502d7110f407de7168648a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 15:08:09 +0700 Subject: [PATCH 062/140] templated quantized sequence Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 295 ++++++++++++------ 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 788a38d499..5738dfec8c 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -15,31 +15,16 @@ namespace hlsl namespace sampling { -template +template struct QuantizedSequence; -// byteslog2 = 1,2; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 0 && BytesLog2 < 3) -struct QuantizedSequence 0 && BytesLog2 < 3) > -{ - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << BytesLog2; - using base_store_type = typename unsigned_integer_of_size::type; - - base_store_type getX() { return data; } - void setX(const base_store_type value) { data = value; } - - base_store_type data; -}; +#define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 -// byteslog2 = 3,4; dim = 1 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 2 && BytesLog2 < 5) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// all Dim=1 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) +struct QuantizedSequence { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = uint16_t(2u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t num_components = uint16_t(1u) << (BytesLog2 - base_bytes_log2); - using store_type = vector; + using store_type = T; store_type getX() { return data; } void setX(const store_type value) { data = value; } @@ -47,116 +32,234 @@ struct QuantizedSequence 2 && Bytes store_type data; }; -// byteslog2 = 2,3; dim = 2 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t, uint32_t; Dim=2,3,4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 1 && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; + using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - base_store_type getX() { return data[0]; } - base_store_type getY() { return data[1]; } - void setX(const base_store_type value) { data[0] = value; } - void setY(const base_store_type value) { data[1] = value; } + store_type getX() { return data & Mask; } + store_type getY() { return (data >> (BitsPerComponent * uint16_t(1u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } + + void setX(const store_type value) + { + data &= ~Mask; + data |= value & Mask; + } + void setY(const store_type value) + { + data &= ~(Mask << BitsPerComponent); + data |= (value & Mask) << BitsPerComponent; + } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + void setZ(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(2u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + void setW(const store_type value) + { + const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + data &= ~(Mask << bits); + data |= (value & Mask) << bits; + } store_type data; }; -// byteslog2 = 1; dim = 2,3,4 -template NBL_PARTIAL_REQ_TOP(Dim > 1 && Dim < 5) -struct QuantizedSequence<1, Dim NBL_PARTIAL_REQ_BOT(Dim > 1 && Dim < 5) > +// Dim 2,3,4 matches vector dim +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == Dim && Dim > 1 && Dim < 5) +struct QuantizedSequence::Dimension == Dim && Dim > 1 && Dim < 5) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = uint16_t; - - base_store_type getX() { return data & MASK; } - base_store_type getY() { return (data >> bits_per_component) & MASK; } + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + + scalar_type getX() { return data[0]; } + scalar_type getY() { return data[1]; } + template NBL_FUNC_REQUIRES(C::value && 2 < Dim) + scalar_type getZ() { return data[2]; } + template NBL_FUNC_REQUIRES(C::value && 3 < Dim) + scalar_type getW() { return data[3]; } + + void setX(const scalar_type value) { data[0] = value; } + void setY(const scalar_type value) { data[1] = value; } template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - base_store_type getZ() { return (data >> (bits_per_component * uint16_t(2u))) & MASK; } + void setZ(const scalar_type value) { data[2] = value; } template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - base_store_type getW() { return (data >> (bits_per_component * uint16_t(3u))) & MASK; } + void setW(const scalar_type value) { data[3] = value; } + + store_type data; +}; - void setX(const base_store_type value) +// uint16_t2, uint32_t2; Dim=3 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 3) +struct QuantizedSequence::Dimension == 2 && Dim == 3) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { - data &= ~MASK; - data |= value & MASK; + scalar_type y = data[0] >> BitsPerComponent; + y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return y; } - void setY(const base_store_type value) + scalar_type getZ() { return data[1] & Mask; } + + void setX(const scalar_type value) { - const uint16_t mask = MASK << bits_per_component; - data &= ~mask; - data |= (value & MASK) << bits_per_component; + data[0] &= ~Mask; + data[0] |= value & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const base_store_type value) + void setY(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(2u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + const uint16_t ybits = StoreBits-BitsPerComponent; + const uint16_t ymask = uint16_t(1u) << ybits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & ymask) << BitsPerComponent; + data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const base_store_type value) + void setZ(const scalar_type value) { - const uint16_t bits = (bits_per_component * uint16_t(3u)); - const uint16_t mask = MASK << bits; - data &= ~mask; - data |= (value & MASK) << bits; + data[1] &= ~Mask; + data[1] |= value & Mask; } - base_store_type data; + store_type data; +}; + +// uint16_t2, uint32_t2; Dim=4 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 2 && Dim == 4) +struct QuantizedSequence::Dimension == 2 && Dim == 4) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + scalar_type getX() { return data[0] & Mask; } + scalar_type getY() { return data[0] >> BitsPerComponent; } + scalar_type getZ() { return data[1] & Mask; } + scalar_type getW() { return data[1] >> BitsPerComponent; } + + void setX(const scalar_type value) + { + data[0] &= ~Mask; + data[0] |= value & Mask; + } + void setY(const scalar_type value) + { + data[0] &= Mask; + data[0] |= (value & Mask) << BitsPerComponent; + } + void setZ(const scalar_type value) + { + data[1] &= ~Mask; + data[1] |= value & Mask; + } + void setW(const scalar_type value) + { + data[1] &= Mask; + data[1] |= (value & Mask) << BitsPerComponent; + } + + store_type data; +}; + +// uint16_t4, uint32_t4; Dim=2 +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 2) +struct QuantizedSequence::Dimension == 4 && Dim == 2) > +{ + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + + base_type getX() { return data.xy; } + base_type getY() { return data.zw; } + + void setX(const base_type value) { data.xy = value; } + void setY(const base_type value) { data.zw = value; } + + store_type data; }; -// byteslog2 = 2,3; dim = 3 -template NBL_PARTIAL_REQ_TOP(BytesLog2 > 1 && BytesLog2 < 4) -struct QuantizedSequence 2 && BytesLog2 < 5) > +// uint16_t4, uint32_t4; Dim=3 +// uint16_t4 --> returns uint16_t2 - 21 bits per component: 16 in x, 5 in y +// uint16_t4 --> returns uint32_t2 - 42 bits per component: 32 in x, 10 in y +template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT && vector_traits::Dimension == 4 && Dim == 3) +struct QuantizedSequence::Dimension == 4 && Dim == 3) > { - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_bytes_log2 = BytesLog2 - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t base_store_bytes = uint16_t(1u) << base_bytes_log2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t store_bits = uint16_t(8u) * base_store_bytes; - NBL_CONSTEXPR_STATIC_INLINE uint16_t bits_per_component = store_bits / uint16_t(3u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t MASK = (uint16_t(1u) << bits_per_component) - uint16_t(1u); - using base_store_type = typename unsigned_integer_of_size::type; - using store_type = vector; - - base_store_type getX() { return data[0] & MASK; } - base_store_type getY() - { - base_store_type y = data[0] >> bits_per_component; - y |= (data[1] >> bits_per_component) << (store_bits-bits_per_component); + using store_type = T; + using scalar_type = typename vector_traits::scalar_type; + using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + + base_type getX() + { + base_type x; + x[0] = data[0]; + x[1] = data[3] & Mask; + return x; + } + base_type getY() + { + base_type y; + y[0] = data[1]; + y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; return y; } - base_store_type getZ() { return data[1] & MASK; } + base_type getZ() + { + base_type z; + z[0] = data[1]; + z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; + return z; + } - void setX(base_store_type x) + void setX(const base_type value) { - data[0] &= ~MASK; - data[0] |= x & MASK; + data[0] = value[0]; + data[3] &= ~Mask; + data[3] |= value[1] & Mask; } - void setY(base_store_type y) + void setY(const base_type value) { - const uint16_t ybits = store_bits-bits_per_component; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= MASK; - data[1] &= MASK; - data[0] |= (y & ymask) << bits_per_component; - data[1] |= (y >> (ybits) & ymask) << bits_per_component; + data[1] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; } - void setZ(base_store_type z) + void setZ(const base_type value) { - data[1] &= ~MASK; - data[1] |= z & MASK; + data[2] = value[0]; + data[3] &= ~Mask; + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); } store_type data; }; -// not complete because we're changing the template params next commit +#undef SEQUENCE_SPECIALIZATION_CONCEPT } From d1a5eb5c963d1359e4e8ae4cd5a2da560e368a69 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 9 Dec 2025 16:12:58 +0700 Subject: [PATCH 063/140] quantized sequence decode Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 5738dfec8c..fcb2488514 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -18,6 +18,89 @@ namespace sampling template struct QuantizedSequence; + +namespace impl +{ +template +struct decode_helper; + +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) + { + scalar_type seqVal = val.getX(); + seqVal ^= scrambleKey; + return hlsl::promote(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using return_type = vector; + + static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + { + uvec_type seqVal; + seqVal[0] = val.getX(); + seqVal[1] = val.getY(); + seqVal[2] = val.getZ(); + seqVal[3] = val.getW(); + seqVal ^= scrambleKey; + return return_type(seqVal) * bit_cast(0x2f800004u); + } +}; +} + +template +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +{ + return impl::decode_helper::__call(val, scrambleKey); +} + + #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 From a512e3997eb3e0bdbc63565d6e300fc97afb0684 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 11:24:14 +0700 Subject: [PATCH 064/140] quantized sequence get/set values by index, simplify decode func specializations Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 272 ++++++------------ 1 file changed, 86 insertions(+), 186 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index fcb2488514..9392a7dab0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -22,72 +22,19 @@ struct QuantizedSequence; namespace impl { template -struct decode_helper; - -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const scalar_type scrambleKey) - { - scalar_type seqVal = val.getX(); - seqVal ^= scrambleKey; - return hlsl::promote(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using return_type = vector; - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); - } -}; -template -struct decode_helper -{ - using scalar_type = typename vector_traits::scalar_type; - using fp_type = typename float_of_size::type; - using uvec_type = vector; - using return_type = vector; - - static return_type __call(NBL_CONST_REF_ARG(QuantizedSequence) val, const uvec_type scrambleKey) - { - uvec_type seqVal; - seqVal[0] = val.getX(); - seqVal[1] = val.getY(); - seqVal[2] = val.getZ(); - seqVal[3] = val.getW(); + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = val.get(i); seqVal ^= scrambleKey; return return_type(seqVal) * bit_cast(0x2f800004u); } @@ -109,8 +56,8 @@ struct QuantizedSequence 0 && idx < 1); return data; } + void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } store_type data; }; @@ -124,34 +71,16 @@ struct QuantizedSequence> (BitsPerComponent * uint16_t(1u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - store_type getZ() { return (data >> (BitsPerComponent * uint16_t(2u))) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - store_type getW() { return (data >> (BitsPerComponent * uint16_t(3u))) & Mask; } - - void setX(const store_type value) - { - data &= ~Mask; - data |= value & Mask; - } - void setY(const store_type value) - { - data &= ~(Mask << BitsPerComponent); - data |= (value & Mask) << BitsPerComponent; - } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const store_type value) + store_type get(const uint16_t idx) { - const uint16_t bits = (BitsPerComponent * uint16_t(2u)); - data &= ~(Mask << bits); - data |= (value & Mask) << bits; + assert(idx > 0 && idx < Dim); + return (data >> (BitsPerComponent * idx)) & Mask; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const store_type value) + + void set(const uint16_t idx, const store_type value) { - const uint16_t bits = (BitsPerComponent * uint16_t(3u)); + assert(idx > 0 && idx < Dim); + const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); data |= (value & Mask) << bits; } @@ -166,19 +95,8 @@ struct QuantizedSequence::scalar_type; - scalar_type getX() { return data[0]; } - scalar_type getY() { return data[1]; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - scalar_type getZ() { return data[2]; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - scalar_type getW() { return data[3]; } - - void setX(const scalar_type value) { data[0] = value; } - void setY(const scalar_type value) { data[1] = value; } - template NBL_FUNC_REQUIRES(C::value && 2 < Dim) - void setZ(const scalar_type value) { data[2] = value; } - template NBL_FUNC_REQUIRES(C::value && 3 < Dim) - void setW(const scalar_type value) { data[3] = value; } + scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } + void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } store_type data; }; @@ -193,33 +111,38 @@ struct QuantizedSequence> BitsPerComponent; - y |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); - return y; - } - scalar_type getZ() { return data[1] & Mask; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) - { - const uint16_t ybits = StoreBits-BitsPerComponent; - const uint16_t ymask = uint16_t(1u) << ybits; - data[0] &= Mask; - data[1] &= Mask; - data[0] |= (value & ymask) << BitsPerComponent; - data[1] |= (value >> (ybits) & ymask) << BitsPerComponent; - } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; + scalar_type get(const uint16_t idx) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + return data[idx] & Mask; + } + else + { + scalar_type z = data[0] >> BitsPerComponent; + z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + return z; + } + } + + void set(const uint16_t idx, const scalar_type value) + { + assert(idx > 0 && idx < 3); + if (idx < 2) + { + data[idx] &= ~Mask; + data[idx] |= value & Mask; + } + else + { + const uint16_t zbits = StoreBits-BitsPerComponent; + const uint16_t zmask = uint16_t(1u) << zbits; + data[0] &= Mask; + data[1] &= Mask; + data[0] |= (value & zmask) << BitsPerComponent; + data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + } } store_type data; @@ -235,30 +158,20 @@ struct QuantizedSequence> BitsPerComponent; } - scalar_type getZ() { return data[1] & Mask; } - scalar_type getW() { return data[1] >> BitsPerComponent; } - - void setX(const scalar_type value) - { - data[0] &= ~Mask; - data[0] |= value & Mask; - } - void setY(const scalar_type value) + scalar_type get(const uint16_t idx) { - data[0] &= Mask; - data[0] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } - void setZ(const scalar_type value) - { - data[1] &= ~Mask; - data[1] |= value & Mask; - } - void setW(const scalar_type value) + + void set(const uint16_t idx, const scalar_type value) { - data[1] &= Mask; - data[1] |= (value & Mask) << BitsPerComponent; + assert(idx > 0 && idx < 4); + const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); + const uint16_t odd = idx & uint16_t(1u); + data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); + data[i] |= (value & Mask) << (BitsPerComponent * odd); } store_type data; @@ -275,11 +188,22 @@ struct QuantizedSequence 0 && idx < 2); + base_type a; + a[0] = data[uint16_t(2u) * idx]; + a[1] = data[uint16_t(2u) * idx + 1]; + return a; + } - void setX(const base_type value) { data.xy = value; } - void setY(const base_type value) { data.zw = value; } + void set(const uint16_t idx, const base_type value) + { + assert(idx > 0 && idx < 2); + base_type a; + data[uint16_t(2u) * idx] = value[0]; + data[uint16_t(2u) * idx + 1] = value[1]; + } store_type data; }; @@ -298,45 +222,21 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - base_type getX() - { - base_type x; - x[0] = data[0]; - x[1] = data[3] & Mask; - return x; - } - base_type getY() + base_type get(const uint16_t idx) { - base_type y; - y[0] = data[1]; - y[1] = (data[3] >> LeftoverBitsPerComponent) & Mask; - return y; - } - base_type getZ() - { - base_type z; - z[0] = data[1]; - z[1] = (data[3] >> (LeftoverBitsPerComponent * uint16_t(2u))) & Mask; - return z; + assert(idx > 0 && idx < 3); + base_type a; + a[0] = data[idx]; + a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; + return a; } - void setX(const base_type value) - { - data[0] = value[0]; - data[3] &= ~Mask; - data[3] |= value[1] & Mask; - } - void setY(const base_type value) - { - data[1] = value[0]; - data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << LeftoverBitsPerComponent; - } - void setZ(const base_type value) + void set(const uint16_t idx, const base_type value) { - data[2] = value[0]; + assert(idx > 0 && idx < 3); + data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * uint16_t(2u)); + data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From f8f6ab0b05bce6e255eb3ee274a18ae1a894ec0e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 10 Dec 2025 16:51:29 +0700 Subject: [PATCH 065/140] quantized sequence encode should right shift input, changed scramble to initialize a pcg hash, added some helpful unorm constants Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 9392a7dab0..b70bddf54e 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/concepts/vector.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" namespace nbl { @@ -21,6 +22,23 @@ struct QuantizedSequence; namespace impl { +template +struct unorm_constant; +template<> +struct unorm_constant<4> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d888889u; }; +template<> +struct unorm_constant<5> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3d042108u; }; +template<> +struct unorm_constant<8> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3b808081u; }; +template<> +struct unorm_constant<10> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x3a802008u; }; +template<> +struct unorm_constant<16> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x37800080u; }; +template<> +struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000004u; }; +template<> +struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; + template struct decode_helper { @@ -29,25 +47,25 @@ struct decode_helper using uvec_type = vector; using sequence_type = QuantizedSequence; using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i); - seqVal ^= scrambleKey; - return return_type(seqVal) * bit_cast(0x2f800004u); + seqVal[i] = val.get(i) ^ pcg(); + return return_type(seqVal) * bit_cast(UNormConstant); } }; } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleKey); + return impl::decode_helper::__call(val, scrambleSeed); } - #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 // all Dim=1 @@ -55,6 +73,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -67,9 +86,10 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_C struct QuantizedSequence::Dimension == 1 && Dim > 1 && Dim < 5) > { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; store_type get(const uint16_t idx) { @@ -82,7 +102,7 @@ struct QuantizedSequence 0 && idx < Dim); const uint16_t bits = (BitsPerComponent * idx); data &= ~(Mask << bits); - data |= (value & Mask) << bits; + data |= ((value >> DiscardBits) & Mask) << bits; } store_type data; @@ -107,9 +127,11 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { @@ -132,16 +154,17 @@ struct QuantizedSequence> DiscardBits) & Mask; } else { const uint16_t zbits = StoreBits-BitsPerComponent; const uint16_t zmask = uint16_t(1u) << zbits; + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; - data[0] |= (value & zmask) << BitsPerComponent; - data[1] |= (value >> (zbits) & zmask) << BitsPerComponent; + data[0] |= (trunc_val & zmask) << BitsPerComponent; + data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; } } @@ -154,9 +177,10 @@ struct QuantizedSequence::scalar_type; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; scalar_type get(const uint16_t idx) { @@ -171,7 +195,7 @@ struct QuantizedSequence> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); - data[i] |= (value & Mask) << (BitsPerComponent * odd); + data[i] |= ((value >> DiscardBits) & Mask) << (BitsPerComponent * odd); } store_type data; @@ -184,9 +208,6 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); base_type get(const uint16_t idx) { @@ -217,10 +238,11 @@ struct QuantizedSequence::scalar_type; using base_type = vector; - NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - size_of_v; + NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; base_type get(const uint16_t idx) { @@ -236,7 +258,7 @@ struct QuantizedSequence 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; - data[3] |= (value[1] & Mask) << (LeftoverBitsPerComponent * idx); + data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); } store_type data; From 4092952f3e7e71c896ff6a080d9b5a21f3af408f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 11 Dec 2025 11:26:37 +0700 Subject: [PATCH 066/140] added decode variant for scramble before decode Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index b70bddf54e..08f23eb170 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -39,8 +39,11 @@ struct unorm_constant<21> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x35000 template<> struct unorm_constant<32> { NBL_CONSTEXPR_STATIC_INLINE uint32_t value = 0x2f800004u; }; +template +struct decode_helper; + template -struct decode_helper +struct decode_helper { using scalar_type = typename vector_traits::scalar_type; using fp_type = typename float_of_size::type; @@ -58,12 +61,46 @@ struct decode_helper return return_type(seqVal) * bit_cast(UNormConstant); } }; +template +struct decode_helper +{ + using scalar_type = typename vector_traits::scalar_type; + using fp_type = typename float_of_size::type; + using uvec_type = vector; + using sequence_type = QuantizedSequence; + using sequence_store_type = typename sequence_type::store_type; + using sequence_scalar_type = typename vector_traits::scalar_type; + using return_type = vector; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + { + random::PCG32 pcg = random::PCG32::construct(scrambleSeed); + + sequence_store_type scrambleKey; + NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) + scrambleKey[i] = sequence_scalar_type(pcg()); + + sequence_type scramble; + scramble.data = scrambleKey ^ val.data; + + // sequence_type scramble; + // NBL_UNROLL for(uint16_t i = 0; i < D; i++) + // scramble.set(i, pcg()); + // scramble.data ^= val.data; + + uvec_type seqVal; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + seqVal[i] = scramble.get(i); + return return_type(seqVal) * bit_cast(UNormConstant); + } +}; } -template +template vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleSeed); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -73,7 +110,7 @@ template NBL_PARTIAL_REQ_TOP(SEQUENCE_SPECIALIZATION_CONCEPT) struct QuantizedSequence { using store_type = T; - NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(store_type)>::value; store_type get(const uint16_t idx) { assert(idx > 0 && idx < 1); return data; } void set(const uint16_t idx, const store_type value) { assert(idx > 0 && idx < 1); data = value; } @@ -90,6 +127,7 @@ struct QuantizedSequence::value; store_type get(const uint16_t idx) { @@ -114,6 +152,7 @@ struct QuantizedSequence::scalar_type; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; scalar_type get(const uint16_t idx) { assert(idx > 0 && idx < Dim); return data[idx]; } void set(const uint16_t idx, const scalar_type value) { assert(idx > 0 && idx < Dim); data[idx] = value; } @@ -181,6 +220,7 @@ struct QuantizedSequence::value; scalar_type get(const uint16_t idx) { @@ -208,6 +248,7 @@ struct QuantizedSequence::scalar_type; using base_type = vector; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { @@ -243,6 +284,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { From 9794d36ef580a4b84f3b880221be1bd65fcd7e7e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 12 Dec 2025 17:32:00 +0700 Subject: [PATCH 067/140] some minor fixes to quantized sequence set, decode Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 08f23eb170..27588dd9e0 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,23 +71,16 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_store_type scrambleKey; - NBL_UNROLL for(uint16_t i = 0; i < vector_traits::Dimension; i++) - scrambleKey[i] = sequence_scalar_type(pcg()); - sequence_type scramble; - scramble.data = scrambleKey ^ val.data; - - // sequence_type scramble; - // NBL_UNROLL for(uint16_t i = 0; i < D; i++) - // scramble.set(i, pcg()); - // scramble.data ^= val.data; + NBL_UNROLL for(uint16_t i = 0; i < D; i++) + scramble.set(i, pcg()); + scramble.data ^= val.data; uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) @@ -197,13 +190,13 @@ struct QuantizedSequence> DiscardBits; + const scalar_type zbits = StoreBits-BitsPerComponent; + const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; - data[1] |= (trunc_val >> (zbits) & zmask) << BitsPerComponent; + data[1] |= ((trunc_val >> zbits) & zmask) << BitsPerComponent; } } From cc2263ab97138e58be2e4b33e13a81c033370dad Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 12:20:05 +0700 Subject: [PATCH 068/140] fix quantized sequence mask being too small, assert conditions Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 27588dd9e0..8ea31cbe71 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -71,7 +71,8 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) { @@ -118,7 +119,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE store_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; @@ -161,13 +162,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -182,15 +183,16 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { + const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= (value >> DiscardBits) & Mask; + data[idx] |= trunc_val &Mask; } else { - const scalar_type zbits = StoreBits-BitsPerComponent; + const uint16_t zbits = StoreBits-BitsPerComponent; const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; @@ -211,20 +213,20 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); return (data[i] >> (BitsPerComponent * (idx & uint16_t(1u)))) & Mask; } void set(const uint16_t idx, const scalar_type value) { - assert(idx > 0 && idx < 4); + assert(idx >= 0 && idx < 4); const uint16_t i = (idx & uint16_t(2u)) >> uint16_t(1u); const uint16_t odd = idx & uint16_t(1u); data[i] &= hlsl::mix(~Mask, Mask, bool(odd)); @@ -245,7 +247,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; a[0] = data[uint16_t(2u) * idx]; a[1] = data[uint16_t(2u) * idx + 1]; @@ -254,7 +256,7 @@ struct QuantizedSequence 0 && idx < 2); + assert(idx >= 0 && idx < 2); base_type a; data[uint16_t(2u) * idx] = value[0]; data[uint16_t(2u) * idx + 1] = value[1]; @@ -275,13 +277,13 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE uint16_t LeftoverBitsPerComponent = BitsPerComponent - uint16_t(8u) * size_of_v; - NBL_CONSTEXPR_STATIC_INLINE uint16_t Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) { - assert(idx > 0 && idx < 3); + assert(idx >= 0 && idx < 3); base_type a; a[0] = data[idx]; a[1] = (data[3] >> (LeftoverBitsPerComponent * idx)) & Mask; @@ -290,7 +292,7 @@ struct QuantizedSequence 0 && idx < 3); + assert(idx >= 0 && idx < 3); data[idx] = value[0]; data[3] &= ~Mask; data[3] |= ((value[1] >> DiscardBits) & Mask) << (LeftoverBitsPerComponent * idx); From 29bc8b0042b15e9344826e8669c26daf3b90af73 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 14:02:09 +0700 Subject: [PATCH 069/140] fixed problems from merging master Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 4 ++-- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 0c498efb79..b13abc6632 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -617,7 +617,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123p; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m)*(phi23p+phi21p)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m))*(phi23p+phi21p)); I += Cm*Sm; } @@ -631,7 +631,7 @@ struct iridescent_helper NBL_UNROLL for (int m=1; m<=2; ++m) { Cm *= r123s; - Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(m)*D, hlsl::promote(m) *(phi23s+phi21s)); + Sm = hlsl::promote(2.0) * evalSensitivity(hlsl::promote(scalar_type(m))*D, hlsl::promote(scalar_type(m)) *(phi23s+phi21s)); I += Cm*Sm; } diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 6a8476e644..1887f4b51f 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -22,7 +22,7 @@ struct Promote }; template NBL_PARTIAL_REQ_TOP(concepts::Vectorial && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) -struct Promote && is_scalar_v && is_same_v::scalar_type, From>) > +struct Promote && (concepts::IntegralLikeScalar || concepts::FloatingPointLikeScalar) && is_same_v::scalar_type, From>) > { NBL_CONSTEXPR_FUNC To operator()(const From v) { diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 02495e2f2e..9190a4ec73 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -347,11 +347,6 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, bool> any(BooleanVector vec); -// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) -template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) -[[vk::ext_instruction(spv::OpSelect)]] -ResultType select(Condition condition, ResultType object1, ResultType object2); - template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); From 6371e6071e7f2ecde4f63750efb8c1835b3f818b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 15:54:29 +0700 Subject: [PATCH 070/140] fix decode scramble key, shifting discard bits in quantization Signed-off-by: Corey --- .../hlsl/sampling/quantized_sequence.hlsl | 38 +++++++++---------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 8ea31cbe71..24ca8eb66d 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -52,12 +52,11 @@ struct decode_helper using return_type = vector; NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); uvec_type seqVal; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - seqVal[i] = val.get(i) ^ pcg(); + seqVal[i] = val.get(i) ^ scrambleKey[i]; return return_type(seqVal) * bit_cast(UNormConstant); } }; @@ -71,16 +70,13 @@ struct decode_helper using sequence_store_type = typename sequence_type::store_type; using sequence_scalar_type = typename vector_traits::scalar_type; using return_type = vector; - // NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<8u*sizeof(scalar_type)>::value; - NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = unorm_constant<21>::value; + NBL_CONSTEXPR_STATIC_INLINE scalar_type UNormConstant = sequence_type::UNormConstant; - static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uint32_t scrambleSeed) + static return_type __call(NBL_CONST_REF_ARG(sequence_type) val, const uvec_type scrambleKey) { - random::PCG32 pcg = random::PCG32::construct(scrambleSeed); - sequence_type scramble; NBL_UNROLL for(uint16_t i = 0; i < D; i++) - scramble.set(i, pcg()); + scramble.set(i, scrambleKey[i]); scramble.data ^= val.data; uvec_type seqVal; @@ -92,9 +88,9 @@ struct decode_helper } template -vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const uint32_t scrambleSeed) +vector::scalar_type)>::type, D> decode(NBL_CONST_REF_ARG(QuantizedSequence) val, const vector::scalar_type, D> scrambleKey) { - return impl::decode_helper::__call(val, scrambleSeed); + return impl::decode_helper::__call(val, scrambleKey); } #define SEQUENCE_SPECIALIZATION_CONCEPT concepts::UnsignedIntegral::scalar_type> && size_of_v::scalar_type> <= 4 @@ -162,13 +158,13 @@ struct QuantizedSequence::scalar_type; NBL_CONSTEXPR_STATIC_INLINE uint16_t StoreBits = uint16_t(8u) * size_of_v; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; - NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (scalar_type(1u) << BitsPerComponent) - scalar_type(1u); + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; @@ -176,24 +172,24 @@ struct QuantizedSequence> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << (StoreBits-BitsPerComponent); + z |= (data[1] >> BitsPerComponent) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - assert(idx >= 0 && idx < 3); + // assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; data[idx] &= ~Mask; - data[idx] |= trunc_val &Mask; + data[idx] |= trunc_val & Mask; } else { - const uint16_t zbits = StoreBits-BitsPerComponent; - const scalar_type zmask = (uint16_t(1u) << zbits) - uint16_t(1u); + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); const scalar_type trunc_val = value >> (DiscardBits-1u); data[0] &= Mask; data[1] &= Mask; @@ -214,7 +210,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE uint16_t BitsPerComponent = StoreBits / Dim; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << BitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant::value; scalar_type get(const uint16_t idx) @@ -278,7 +274,7 @@ struct QuantizedSequence; NBL_CONSTEXPR_STATIC_INLINE scalar_type Mask = (uint16_t(1u) << LeftoverBitsPerComponent) - uint16_t(1u); - NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = StoreBits - BitsPerComponent; + NBL_CONSTEXPR_STATIC_INLINE uint16_t DiscardBits = (uint16_t(8u) * size_of_v) - BitsPerComponent; NBL_CONSTEXPR_STATIC_INLINE uint32_t UNormConstant = impl::unorm_constant<8u*sizeof(scalar_type)>::value; base_type get(const uint16_t idx) From 59d4f79813821e45dedb9050684efe3a289a2e79 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 15 Dec 2025 17:03:08 +0700 Subject: [PATCH 071/140] fix z component storing too many bits in quantized sequence in vec2 data type for dim 3 Signed-off-by: Corey --- .../builtin/hlsl/sampling/quantized_sequence.hlsl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl index 24ca8eb66d..8929609c34 100644 --- a/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl +++ b/include/nbl/builtin/hlsl/sampling/quantized_sequence.hlsl @@ -164,22 +164,24 @@ struct QuantizedSequence= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { return data[idx] & Mask; } else { - scalar_type z = data[0] >> BitsPerComponent; - z |= (data[1] >> BitsPerComponent) << DiscardBits; + const scalar_type zbits = scalar_type(DiscardBits); + const scalar_type zmask = (scalar_type(1u) << zbits) - scalar_type(1u); + scalar_type z = (data[0] >> BitsPerComponent) & zmask; + z |= ((data[1] >> BitsPerComponent) & zmask) << DiscardBits; return z; } } void set(const uint16_t idx, const scalar_type value) { - // assert(idx >= 0 && idx < 3); + assert(idx >= 0 && idx < 3); if (idx < 2) { const scalar_type trunc_val = value >> DiscardBits; @@ -190,7 +192,7 @@ struct QuantizedSequence> (DiscardBits-1u); + const scalar_type trunc_val = value >> DiscardBits; data[0] &= Mask; data[1] &= Mask; data[0] |= (trunc_val & zmask) << BitsPerComponent; From 7a8b9a521671f2161b4f2bbff4aca5a65589e232 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:54:43 +0700 Subject: [PATCH 072/140] mix_helper requirements include bool vectors Signed-off-by: Corey --- include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 5e3418efe0..0af2618348 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -982,8 +982,8 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::BooleanScalar && !impl::MixCallingBuiltins) > +template NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) +struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From 0063658a27e5531e506cc9d86f226e9ea62f695d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 10:55:28 +0700 Subject: [PATCH 073/140] fix iridescent oriented eta bug Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index b13abc6632..954022e216 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -756,7 +756,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = base_type::ior1[0] / base_type::ior3[0]; + rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } From eb369cf538859a0dceb7e997627c7c82467dac83 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:27:07 +0700 Subject: [PATCH 074/140] partitionRandVar stores leftProb, fix minor bugs Signed-off-by: Corey --- .../builtin/hlsl/bxdf/base/cook_torrance_base.hlsl | 3 ++- .../hlsl/bxdf/transmission/smooth_dielectric.hlsl | 6 ++++-- include/nbl/builtin/hlsl/sampling/basic.hlsl | 12 +++++++----- .../hlsl/sampling/projected_spherical_triangle.hlsl | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index 5e5e543791..c3de375678 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -304,7 +304,8 @@ struct SCookTorrance scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(z, rcpChoiceProb); const scalar_type LdotH = hlsl::mix(VdotH, ieee754::copySign(hlsl::sqrt(rcpEta.value2[0]*VdotH*VdotH + scalar_type(1.0) - rcpEta.value2[0]), -VdotH), transmitted); bool valid; diff --git a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl index 712b614755..6d5744fb49 100644 --- a/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/transmission/smooth_dielectric.hlsl @@ -41,7 +41,8 @@ struct SSmoothDielectric scalar_type rcpChoiceProb; sampling::PartitionRandVariable partitionRandVariable; - bool transmitted = partitionRandVariable(reflectance, u.z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectance; + bool transmitted = partitionRandVariable(u.z, rcpChoiceProb); ray_dir_info_type V = interaction.getV(); Refract r = Refract::create(V.getDirection(), interaction.getN()); @@ -128,7 +129,8 @@ struct SThinSmoothDielectric scalar_type rcpChoiceProb; scalar_type z = u.z; sampling::PartitionRandVariable partitionRandVariable; - const bool transmitted = partitionRandVariable(reflectionProb, z, rcpChoiceProb); + partitionRandVariable.leftProb = reflectionProb; + const bool transmitted = partitionRandVariable(z, rcpChoiceProb); remainderMetadata = hlsl::mix(reflectance, hlsl::promote(1.0) - reflectance, transmitted) * rcpChoiceProb; ray_dir_info_type V = interaction.getV(); diff --git a/include/nbl/builtin/hlsl/sampling/basic.hlsl b/include/nbl/builtin/hlsl/sampling/basic.hlsl index d0738dd930..9c575a22ce 100644 --- a/include/nbl/builtin/hlsl/sampling/basic.hlsl +++ b/include/nbl/builtin/hlsl/sampling/basic.hlsl @@ -19,14 +19,14 @@ template) struct PartitionRandVariable { using floating_point_type = T; - using uint_type = typename unsigned_integer_of_size::type; + using uint_type = unsigned_integer_of_size_t; - bool operator()(floating_point_type leftProb, NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) + bool operator()(NBL_REF_ARG(floating_point_type) xi, NBL_REF_ARG(floating_point_type) rcpChoiceProb) { - const floating_point_type NEXT_ULP_AFTER_UNITY = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); - const bool pickRight = xi >= leftProb * NEXT_ULP_AFTER_UNITY; + const floating_point_type NextULPAfterUnity = bit_cast(bit_cast(floating_point_type(1.0)) + uint_type(1u)); + const bool pickRight = xi >= leftProb * NextULPAfterUnity; - // This is all 100% correct taking into account the above NEXT_ULP_AFTER_UNITY + // This is all 100% correct taking into account the above NextULPAfterUnity xi -= pickRight ? leftProb : floating_point_type(0.0); rcpChoiceProb = floating_point_type(1.0) / (pickRight ? (floating_point_type(1.0) - leftProb) : leftProb); @@ -34,6 +34,8 @@ struct PartitionRandVariable return pickRight; } + + floating_point_type leftProb; }; diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index f2f29ed12b..0578af5b19 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -49,7 +49,7 @@ struct ProjectedSphericalTriangle // pre-warp according to proj solid angle approximation vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, u); + u = bilinear.generate(rcpPdf, _u); // now warp the points onto a spherical triangle const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); From 92493dad08a0eceb3488c293bdb086e4ccf371bc Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 11:46:51 +0700 Subject: [PATCH 075/140] plain const for vector types Signed-off-by: Corey --- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 12 ++++----- .../hlsl/sampling/box_muller_transform.hlsl | 2 +- .../hlsl/sampling/concentric_mapping.hlsl | 2 +- .../hlsl/sampling/cos_weighted_spheres.hlsl | 10 +++---- include/nbl/builtin/hlsl/sampling/linear.hlsl | 4 +-- .../projected_spherical_triangle.hlsl | 12 ++++----- .../hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../hlsl/sampling/spherical_triangle.hlsl | 8 +++--- .../hlsl/sampling/uniform_spheres.hlsl | 4 +-- .../hlsl/shapes/spherical_rectangle.hlsl | 6 ++--- .../hlsl/shapes/spherical_triangle.hlsl | 27 +++++++++++++++++-- 11 files changed, 56 insertions(+), 33 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 746713e4c4..a74869990f 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -24,7 +24,7 @@ struct Bilinear using vector3_type = vector; using vector4_type = vector; - static Bilinear create(NBL_CONST_REF_ARG(vector4_type) bilinearCoeffs) + static Bilinear create(const vector4_type bilinearCoeffs) { Bilinear retval; retval.bilinearCoeffs = bilinearCoeffs; @@ -32,22 +32,22 @@ struct Bilinear return retval; } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) _u) + vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) { - vector2_type u = _u; + vector2_type u; Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(u.y); + u.y = lineary.generate(_u.y); const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(u.x); + u.x = linearx.generate(_u.x); rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); return u; } - scalar_type pdf(NBL_CONST_REF_ARG(vector2_type) u) + scalar_type pdf(const vector2_type u) { return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); } diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 93cea06ee0..9474642f4c 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -21,7 +21,7 @@ struct BoxMullerTransform using scalar_type = T; using vector2_type = vector; - vector2_type operator()(vector2_type xi) + vector2_type operator()(const vector2_type xi) { scalar_type sinPhi, cosPhi; math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 5f29b80f8c..c44b55449d 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -17,7 +17,7 @@ namespace sampling { template -vector concentricMapping(vector _u) +vector concentricMapping(const vector _u) { //map [0;1]^2 to [-1;1]^2 vector u = 2.0f * _u - hlsl::promote >(1.0); diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index 9f95bf2ee5..ddbb961300 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -22,26 +22,26 @@ struct ProjectedHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); return vector_t3(p.x, p.y, z); } - static T pdf(T L_z) + static T pdf(const T L_z) { return L_z * numbers::inv_pi; } template > - static sampling::quotient_and_pdf quotient_and_pdf(T L) + static sampling::quotient_and_pdf quotient_and_pdf(const T L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L)); } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } @@ -77,7 +77,7 @@ struct ProjectedSphere } template > - static sampling::quotient_and_pdf quotient_and_pdf(vector_t3 L) + static sampling::quotient_and_pdf quotient_and_pdf(const vector_t3 L) { return sampling::quotient_and_pdf::create(hlsl::promote(1.0), pdf(L.z)); } diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index ddd7bcf8df..6c3cf1fad9 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -21,7 +21,7 @@ struct Linear using scalar_type = T; using vector2_type = vector; - static Linear create(NBL_CONST_REF_ARG(vector2_type) linearCoeffs) // start and end importance values (start, end) + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) { Linear retval; retval.linearCoeffStart = linearCoeffs[0]; @@ -32,7 +32,7 @@ struct Linear return retval; } - scalar_type generate(scalar_type u) + scalar_type generate(const scalar_type u) { return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); } diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 0578af5b19..e60fe28423 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -33,17 +33,17 @@ struct ProjectedSphericalTriangle return retval; } - vector4_type computeBilinearPatch(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF) + vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) { const scalar_type minimumProjSolidAngle = 0.0; matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), (vector3_type)minimumProjSolidAngle); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); return bxdfPdfAtVertex.yyxz; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) _u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) { vector2_type u; // pre-warp according to proj solid angle approximation @@ -58,7 +58,7 @@ struct ProjectedSphericalTriangle return L; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool isBSDF, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -66,7 +66,7 @@ struct ProjectedSphericalTriangle return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); } - scalar_type pdf(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); @@ -76,7 +76,7 @@ struct ProjectedSphericalTriangle return pdf * bilinear.pdf(u); } - scalar_type pdf(NBL_CONST_REF_ARG(vector3_type) receiverNormal, bool receiverWasBSDF, NBL_CONST_REF_ARG(vector3_type) L) + scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) { scalar_type pdf; const vector2_type u = sphtri.generateInverse(pdf, L); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index f5c19fb864..f9e3d2f7ae 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -32,7 +32,7 @@ struct SphericalRectangle return retval; } - vector2_type generate(NBL_CONST_REF_ARG(vector2_type) rectangleExtents, NBL_CONST_REF_ARG(vector2_type) uv, NBL_REF_ARG(scalar_type) S) + vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) { const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index c31e194788..5770403cd2 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -33,7 +33,7 @@ struct SphericalTriangle } // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) { scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); @@ -66,7 +66,7 @@ struct SphericalTriangle return retval; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, NBL_CONST_REF_ARG(vector2_type) u) + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; @@ -76,7 +76,7 @@ struct SphericalTriangle return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, NBL_CONST_REF_ARG(vector3_type) cos_vertices, NBL_CONST_REF_ARG(vector3_type) sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) { pdf = 1.0 / solidAngle; @@ -102,7 +102,7 @@ struct SphericalTriangle return vector2_type(u,v); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(vector3_type) L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) { scalar_type cos_a, cos_c, csc_b, csc_c; vector3_type cos_vertices, sin_vertices; diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index df4100db9b..5fc3bc7a0b 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -23,7 +23,7 @@ struct UniformHemisphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -49,7 +49,7 @@ struct UniformSphere using vector_t2 = vector; using vector_t3 = vector; - static vector_t3 generate(vector_t2 _sample) + static vector_t3 generate(const vector_t2 _sample) { T z = T(1.0) - T(2.0) * _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index daeb3175c3..11442bef7c 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -25,14 +25,14 @@ struct SphericalRectangle using vector4_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(matrix3x3_type) basis) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) { SphericalRectangle retval; retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); return retval; } - static SphericalRectangle create(NBL_CONST_REF_ARG(vector3_type) observer, NBL_CONST_REF_ARG(vector3_type) rectangleOrigin, NBL_CONST_REF_ARG(vector3_type) T, NBL_CONST_REF_ARG(vector3_type) B, NBL_CONST_REF_ARG(vector3_type) N) + static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) { SphericalRectangle retval; matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); @@ -40,7 +40,7 @@ struct SphericalRectangle return retval; } - scalar_type solidAngleOfRectangle(NBL_CONST_REF_ARG(vector) rectangleExtents) + scalar_type solidAngleOfRectangle(const vector rectangleExtents) { const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 246ca5a084..7304fa72e9 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,7 +25,7 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(NBL_CONST_REF_ARG(vector3_type) vertex0, NBL_CONST_REF_ARG(vector3_type) vertex1, NBL_CONST_REF_ARG(vector3_type) vertex2, NBL_CONST_REF_ARG(vector3_type) origin) + static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) { SphericalTriangle retval; retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); @@ -72,7 +72,7 @@ struct SphericalTriangle return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); } - scalar_type projectedSolidAngleOfTriangle(NBL_CONST_REF_ARG(vector3_type) receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; @@ -102,6 +102,29 @@ struct SphericalTriangle vector3_type csc_sides; }; +namespace util +{ + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. + template + vector compInternalAngle(const vector e0, vector e1, const vector e2) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); + + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); + } +} + } } } From 38f68a7ec79f039a780868285ab7eeb955999a06 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 14:11:16 +0700 Subject: [PATCH 076/140] fixes to iridescent fresnel, moved getOrientedEtaRcp to dielectric fresnels only Signed-off-by: Corey --- .../hlsl/bxdf/base/cook_torrance_base.hlsl | 2 +- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 51 ++++++++++--------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl index c3de375678..d70e8823da 100644 --- a/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/base/cook_torrance_base.hlsl @@ -280,7 +280,7 @@ struct SCookTorrance const scalar_type NdotV = localV.z; fresnel_type _f = __getOrientedFresnel(fresnel, NdotV); - fresnel::OrientedEtaRcps rcpEta = _f.getOrientedEtaRcps(); + fresnel::OrientedEtaRcps rcpEta = _f.getRefractionOrientedEtaRcps(); const vector3_type upperHemisphereV = ieee754::flipSignIfRHSNegative(localV, hlsl::promote(NdotV)); const vector3_type localH = ndf.generateH(upperHemisphereV, u.xy); diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index 954022e216..d32d3de16c 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -313,9 +313,7 @@ NBL_CONCEPT_BEGIN(2) NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) ((NBL_CONCEPT_REQ_TYPE)(T::vector_type)) - ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel(cosTheta)), ::nbl::hlsl::is_same_v, typename T::vector_type)) - ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ); #undef cosTheta #undef fresnel @@ -331,7 +329,9 @@ NBL_CONCEPT_BEGIN(2) #define cosTheta NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_1 NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(Fresnel, T)) + ((NBL_CONCEPT_REQ_TYPE)(T::eta_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) ); #undef cosTheta @@ -362,7 +362,7 @@ struct Schlick return F0 + (1.0 - F0) * x*x*x*x*x; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { const eta_type sqrtF0 = hlsl::sqrt(F0); OrientedEtaRcps rcpEta; @@ -424,13 +424,13 @@ struct Conductor return (rs2 + rp2) * hlsl::promote(0.5); } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / eta; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / eta; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } T eta; T etak2; @@ -484,7 +484,7 @@ struct Dielectric // default to monochrome, but it is possible to have RGB fresnel without dispersion fixing the refraction Eta // to be something else than the etas used to compute RGB reflectance or some sort of interpolation of them scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return orientedEta.value[0]; } - OrientedEtaRcps getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { return orientedEta.getReciprocals(); } Dielectric getReorientedFresnel(const scalar_type NdotI) NBL_CONST_MEMBER_FUNC { @@ -548,8 +548,6 @@ struct iridescent_helper static T __call(const vector_type _D, const vector_type ior1, const vector_type ior2, const vector_type ior3, const vector_type iork3, const vector_type eta12, const vector_type eta23, const vector_type etak23, const scalar_type clampedCosTheta) { - const vector_type wavelengths = vector_type(Colorspace::wavelength_R, Colorspace::wavelength_G, Colorspace::wavelength_B); - const scalar_type cosTheta_1 = clampedCosTheta; vector_type R12p, R23p, R12s, R23s; vector_type cosTheta_2; @@ -589,7 +587,6 @@ struct iridescent_helper // Optical Path Difference const vector_type D = _D * cosTheta_2; - const vector_type Dphi = hlsl::promote(2.0 * numbers::pi) * D / wavelengths; vector_type phi21p, phi21s, phi23p, phi23s, r123s, r123p, Rs; vector_type I = hlsl::promote(0.0); @@ -635,7 +632,7 @@ struct iridescent_helper I += Cm*Sm; } - return hlsl::max(colorspace::scRGB::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); + return hlsl::max(Colorspace::FromXYZ(I) * hlsl::promote(0.5), hlsl::promote(0.0)); } }; @@ -652,6 +649,7 @@ struct iridescent_base vector_type iork3; vector_type eta12; // outside (usually air 1.0) -> thin-film IOR vector_type eta23; // thin-film -> base material IOR + vector_type eta13; }; } @@ -688,6 +686,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC - { - OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(1.0) / base_type::eta23; - rcpEta.value2 = rcpEta.value * rcpEta.value; - return rcpEta; - } + // OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + // { + // OrientedEtaRcps rcpEta; + // rcpEta.value = hlsl::promote(1.0) / base_type::eta13; + // rcpEta.value2 = rcpEta.value * rcpEta.value; + // return rcpEta; + // } vector_type getEtak23() NBL_CONST_MEMBER_FUNC { @@ -743,6 +742,7 @@ struct Iridescent getOrientedEtaRcps() NBL_CONST_MEMBER_FUNC + scalar_type getRefractionOrientedEta() NBL_CONST_MEMBER_FUNC { return base_type::eta13[0]; } + OrientedEtaRcps getRefractionOrientedEtaRcps() NBL_CONST_MEMBER_FUNC { OrientedEtaRcps rcpEta; - rcpEta.value = hlsl::promote(base_type::ior1[0] / base_type::ior3[0]); + rcpEta.value = hlsl::promote(1.0) / hlsl::promote(base_type::eta13[0]); rcpEta.value2 = rcpEta.value * rcpEta.value; return rcpEta; } @@ -771,6 +771,7 @@ struct Iridescent(1.0)/base_type::eta23, flip); orientedFresnel.eta23 = hlsl::mix(base_type::eta23, hlsl::promote(1.0)/base_type::eta12, flip); + orientedFresnel.eta13 = hlsl::mix(base_type::eta13, hlsl::promote(1.0)/base_type::eta13, flip); return orientedFresnel; } From 5c1dc531d235a3d0ae9a27b5b855b53bb9ace6e8 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:22:23 +0700 Subject: [PATCH 077/140] adjust mix_helper calling select requirements Signed-off-by: Corey --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 0af2618348..2b1f56d57d 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -982,8 +982,19 @@ struct mix_helper NBL_PARTIAL_REQ_TOP((concepts::Vectorial || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) -struct mix_helper || concepts::Scalar) && concepts::Boolean && !impl::MixCallingBuiltins) > +namespace impl +{ +template +NBL_BOOL_CONCEPT MixCallingSelect = +#ifdef __HLSL_VERSION +spirv::SelectIsCallable; +#else +concepts::Boolean && (concepts::Scalar || (concepts::Vector && vector_traits::Dimension==vector_traits::Dimension)) && !MixCallingBuiltins; +#endif +} + +template NBL_PARTIAL_REQ_TOP(impl::MixCallingSelect) +struct mix_helper) > { using return_t = T; static return_t __call(NBL_CONST_REF_ARG(T) x, NBL_CONST_REF_ARG(T) y, NBL_CONST_REF_ARG(U) a) From a846ae4f45ea03a623cf4061165421342905df94 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 15:35:50 +0700 Subject: [PATCH 078/140] restore regular triangle stuff, refactor usage Signed-off-by: Corey --- .../hlsl/shapes/spherical_triangle.hlsl | 23 ---------- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 46 +++++++++++++++++++ .../asset/utils/CSmoothNormalGenerator.cpp | 4 +- src/nbl/builtin/CMakeLists.txt | 1 + 4 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 include/nbl/builtin/hlsl/shapes/triangle.hlsl diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 7304fa72e9..f574b106ce 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -102,29 +102,6 @@ struct SphericalTriangle vector3_type csc_sides; }; -namespace util -{ - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector compInternalAngle(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); - - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } -} - } } } diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..d64fc9d29d --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + +namespace util +{ + // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. + template + vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) + { + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); + + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); + } +} + +} +} +} + +#endif \ No newline at end of file diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 43413152a8..2ed1d4e19e 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -5,7 +5,7 @@ #include "CSmoothNormalGenerator.h" #include "nbl/core/declarations.h" -#include "nbl/builtin/hlsl/shapes/spherical_triangle.hlsl" +#include "nbl/builtin/hlsl/shapes/triangle.hlsl" #include @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::compInternalAngle(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e76b6b6f99..816f49fd73 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -253,6 +253,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/circle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/ellipse.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/line.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/beziers.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_triangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/spherical_rectangle.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/shapes/aabb.hlsl") From eb2679efcb2ff4a127d902b73ab0f295b0e6c590 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:13:14 +0700 Subject: [PATCH 079/140] minor changes to rwmc usage Signed-off-by: Corey --- .../builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 9 +++++---- .../builtin/hlsl/rwmc/SplattingParameters.hlsl | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 593e267a26..2ab953b469 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -69,12 +69,9 @@ struct CascadeAccumulator // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp void addSample(uint32_t sampleCount, input_sample_type _sample) { - const float32_t2 unpackedParams = hlsl::unpackHalf2x16(splattingParameters.packedLog2); - const cascade_layer_scalar_type log2Start = unpackedParams[0]; - const cascade_layer_scalar_type log2Base = unpackedParams[1]; const cascade_layer_scalar_type luma = getLuma(_sample); const cascade_layer_scalar_type log2Luma = log2(luma); - const cascade_layer_scalar_type cascade = log2Luma * 1.f / log2Base - log2Start / log2Base; + const cascade_layer_scalar_type cascade = log2Luma * splattingParameters.rcpLog2Base - splattingParameters.baseRootOfStart; const cascade_layer_scalar_type clampedCascade = clamp(cascade, 0, CascadeCount - 1); // c<=0 -> 0, c>=Count-1 -> Count-1 uint32_t lowerCascadeIndex = floor(cascade); @@ -85,7 +82,11 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) + { + const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; + const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); + } accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index c549d83be6..df39660d95 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_RWMC_SPLATTING_PARAMETERS_HLSL_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" namespace nbl { @@ -12,10 +13,18 @@ namespace rwmc struct SplattingParameters { - // float16_t log2Start; 0 - // float16_t log2Base; 1 - // pack as Half2x16 - int32_t packedLog2; + using scalar_t = float; + + static SplattingParameters create(const scalar_t base, const scalar_t start) + { + SplattingParameters retval; + retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); + retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + return retval; + } + + scalar_t baseRootOfStart; + scalar_t rcpLog2Base; }; } From 8fc944a0f9b1a6d610b7c42be25e47d863cd1544 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:26:12 +0700 Subject: [PATCH 080/140] remove storing texture inside local var of ResolveAccessorAdaptor Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index d8f777d277..6c2b8b3230 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -48,8 +48,6 @@ struct ResolveAccessorAdaptor using output_type = vector; NBL_CONSTEXPR int32_t image_dimension = 2; - RWTexture2DArray cascade; - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) { return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); From f75982127779aea2b142a1615fcfa1ca5761d840 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 16:53:31 +0700 Subject: [PATCH 081/140] removed accessor, user should provide accessor that matches concept Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/resolve.hlsl | 26 ---------------------- 1 file changed, 26 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl index 6c2b8b3230..906cad512b 100644 --- a/include/nbl/builtin/hlsl/rwmc/resolve.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/resolve.hlsl @@ -41,32 +41,6 @@ NBL_CONCEPT_END( template NBL_BOOL_CONCEPT ResolveAccessor = ResolveAccessorBase && concepts::accessors::LoadableImage; -template -struct ResolveAccessorAdaptor -{ - using output_scalar_type = OutputScalar; - using output_type = vector; - NBL_CONSTEXPR int32_t image_dimension = 2; - - float32_t calcLuma(NBL_REF_ARG(float32_t3) col) - { - return hlsl::dot(colorspace::scRGB::ToXYZ()[1], col); - } - - template - output_type get(vector uv, uint16_t layer) - { - uint32_t imgWidth, imgHeight, layers; - cascade.GetDimensions(imgWidth, imgHeight, layers); - int16_t2 cascadeImageDimension = int16_t2(imgWidth, imgHeight); - - if (any(uv < int16_t2(0, 0)) || any(uv > cascadeImageDimension)) - return vector(0, 0, 0, 0); - - return cascade.Load(int32_t3(uv, int32_t(layer))); - } -}; - template && ResolveAccessor) struct Resolver { From c3567491b8101ebcf97cb1d0d7be12f81b7db30b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:02:49 +0700 Subject: [PATCH 082/140] fix formatting, name Signed-off-by: Corey --- include/nbl/builtin/hlsl/shapes/triangle.hlsl | 36 +++++++++---------- .../asset/utils/CSmoothNormalGenerator.cpp | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl index d64fc9d29d..b2f4170f70 100644 --- a/include/nbl/builtin/hlsl/shapes/triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -18,25 +18,25 @@ namespace shapes namespace util { - // Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. - template - vector anglesFromTriangleEdge(const vector e0, vector e1, const vector e2) - { - // Calculate this triangle's weight for each of its three m_vertices - // start by calculating the lengths of its sides - const float_t a = hlsl::dot(e0, e0); - const float_t asqrt = hlsl::sqrt(a); - const float_t b = hlsl::dot(e1, e1); - const float_t bsqrt = hlsl::sqrt(b); - const float_t c = hlsl::dot(e2, e2); - const float_t csqrt = hlsl::sqrt(c); +// Use this convetion e_i = v_{i+2}-v_{i+1}. vertex index is modulo by 3. +template +vector anglesFromTriangleEdges(const vector e0, vector e1, const vector e2) +{ + // Calculate this triangle's weight for each of its three m_vertices + // start by calculating the lengths of its sides + const float_t a = hlsl::dot(e0, e0); + const float_t asqrt = hlsl::sqrt(a); + const float_t b = hlsl::dot(e1, e1); + const float_t bsqrt = hlsl::sqrt(b); + const float_t c = hlsl::dot(e2, e2); + const float_t csqrt = hlsl::sqrt(c); - const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); - const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); - const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); - // use them to find the angle at each vertex - return vector(angle0, angle1, angle2); - } + const float_t angle0 = hlsl::acos((b + c - a) / (2.f * bsqrt * csqrt)); + const float_t angle1 = hlsl::acos((-b + c + a) / (2.f * asqrt * csqrt)); + const float_t angle2 = hlsl::numbers::pi - (angle0 + angle1); + // use them to find the angle at each vertex + return vector(angle0, angle1, angle2); +} } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index 2ed1d4e19e..f8bc45a317 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -58,7 +58,7 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdge(v2 - v1, v0 - v2, v1 - v2); + const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); vertices.add({ i, 0, faceNormal * angleWages.x, v0}); vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); From 0478e76e38207f162b13ad4b971bbb36773985bb Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:09:48 +0700 Subject: [PATCH 083/140] added more things to precompute Signed-off-by: Corey --- include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 6 +----- include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 10 +++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 2ab953b469..1ed8884206 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,11 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - { - const cascade_layer_scalar_type log2Base = cascade_layer_scalar_type(1.0) / splattingParameters.rcpLog2Base; - const cascade_layer_scalar_type log2Start = splattingParameters.baseRootOfStart * log2Base; - lowerCascadeWeight = exp2(log2Start + log2Base * (CascadeCount - 1) - log2Luma); - } + lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index df39660d95..a6c479a8e2 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -18,11 +18,15 @@ struct SplattingParameters static SplattingParameters create(const scalar_t base, const scalar_t start) { SplattingParameters retval; - retval.rcpLog2Base = scalar_t(1.0) / hlsl::log2(base); - retval.baseRootOfStart = hlsl::log2(start) * retval.rcpLog2Base; + retval.log2Base = hlsl::log2(base); + retval.log2Start = hlsl::log2(start); + retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; + retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; return retval; } - + + scalar_t log2Base; + scalar_t log2Start; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From 5c83e69111cce4f765c42d004d959bc34dabb61a Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:27:55 +0700 Subject: [PATCH 084/140] changes to splatting params precompute for the last time Signed-off-by: Corey --- .../nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl | 2 +- .../nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl index 1ed8884206..9413bcee98 100644 --- a/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl @@ -82,7 +82,7 @@ struct CascadeAccumulator // handle super bright sample case if (cascade > CascadeCount - 1) - lowerCascadeWeight = exp2(splattingParameters.log2Start + splattingParameters.log2Base * (CascadeCount - 1) - log2Luma); + lowerCascadeWeight = splattingParameters.lastCascadeLuma / luma; accumulation.addSampleIntoCascadeEntry(_sample, lowerCascadeIndex, lowerCascadeWeight, higherCascadeWeight, sampleCount); } diff --git a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl index a6c479a8e2..a3a3520415 100644 --- a/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl +++ b/include/nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl @@ -15,18 +15,18 @@ struct SplattingParameters { using scalar_t = float; - static SplattingParameters create(const scalar_t base, const scalar_t start) + static SplattingParameters create(const scalar_t base, const scalar_t start, const uint32_t cascadeCount) { SplattingParameters retval; - retval.log2Base = hlsl::log2(base); - retval.log2Start = hlsl::log2(start); - retval.rcpLog2Base = scalar_t(1.0) / retval.log2Base; - retval.baseRootOfStart = retval.log2Start * retval.rcpLog2Base; + const scalar_t log2Base = hlsl::log2(base); + const scalar_t log2Start = hlsl::log2(start); + retval.lastCascadeLuma = hlsl::exp2(log2Start + log2Base * (cascadeCount - 1)); + retval.rcpLog2Base = scalar_t(1.0) / log2Base; + retval.baseRootOfStart = log2Start * retval.rcpLog2Base; return retval; } - scalar_t log2Base; - scalar_t log2Start; + scalar_t lastCascadeLuma; scalar_t baseRootOfStart; scalar_t rcpLog2Base; }; From ea1919ee5e6449d9757af7132ce14b891a0e8b25 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Dec 2025 17:41:18 +0700 Subject: [PATCH 085/140] check that eta type of dielectric fresnels should be monochrome Signed-off-by: Corey --- include/nbl/builtin/hlsl/bxdf/fresnel.hlsl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl index d32d3de16c..33faa79efc 100644 --- a/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl +++ b/include/nbl/builtin/hlsl/bxdf/fresnel.hlsl @@ -319,6 +319,12 @@ NBL_CONCEPT_END( #undef fresnel #include +namespace impl +{ +template +NBL_BOOL_CONCEPT VectorIsMonochrome = vector_traits::Dimension == 1; +} + #define NBL_CONCEPT_NAME TwoSidedFresnel #define NBL_CONCEPT_TPLT_PRM_KINDS (typename) #define NBL_CONCEPT_TPLT_PRM_NAMES (T) @@ -333,6 +339,7 @@ NBL_CONCEPT_END( ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEta()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getRefractionOrientedEtaRcps()), ::nbl::hlsl::is_same_v, OrientedEtaRcps)) ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((fresnel.getReorientedFresnel(cosTheta)), ::nbl::hlsl::is_same_v, T)) + ((NBL_CONCEPT_REQ_TYPE_ALIAS_CONCEPT)(impl::VectorIsMonochrome, typename T::eta_type)) ); #undef cosTheta #undef fresnel From 367b9bf1cda6e4cdf39e6f3d9397deae0e7a7f1c Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 16 Dec 2025 13:02:12 +0100 Subject: [PATCH 086/140] Change `Compile flag error` to `Compile flag warning` so CI logs are easier to Ctrl+F Signed-off-by: Corey --- src/nbl/asset/utils/CHLSLCompiler.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nbl/asset/utils/CHLSLCompiler.cpp b/src/nbl/asset/utils/CHLSLCompiler.cpp index 306d2f60de..d36ecfa1cb 100644 --- a/src/nbl/asset/utils/CHLSLCompiler.cpp +++ b/src/nbl/asset/utils/CHLSLCompiler.cpp @@ -115,11 +115,11 @@ static bool fixup_spirv_target_ver(std::vector& arguments, system: const auto found = AllowedSuffices.find(suffix); if (found!=AllowedSuffices.end()) return true; - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env= found but with unsupported value `%s`.", system::ILogger::ELL_ERROR, "TODO: write wchar to char convert usage"); return false; } - logger.log("Compile flag error: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -fspv-target-env=. Force enabling -fspv-target-env=vulkan1.3, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-fspv-target-env=vulkan1.3"); return true; } @@ -148,7 +148,7 @@ static void try_upgrade_hlsl_version(std::vector& arguments, syste } else { - logger.log("Compile flag error: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); + logger.log("Compile flag warning: Required compile flag not found -HV. Force enabling -HV 202x, as it is required by Nabla.", system::ILogger::ELL_WARNING); arguments.push_back(L"-HV"); arguments.push_back(L"202x"); } @@ -254,7 +254,7 @@ static void add_required_arguments_if_not_present(std::vector& arg { bool missing = set.find(required[j]) == set.end(); if (missing) { - logger.log("Compile flag error: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); + logger.log("Compile flag warning: Required compile flag not found %ls. This flag will be force enabled, as it is required by Nabla.", system::ILogger::ELL_WARNING, required[j]); arguments.push_back(required[j]); } } @@ -534,4 +534,4 @@ void CHLSLCompiler::insertIntoStart(std::string& code, std::ostringstream&& ins) code.insert(0u, ins.str()); } -#endif \ No newline at end of file +#endif From e1343781586524ab902b2227bf0fff503c92843e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 18 Dec 2025 16:23:48 +0100 Subject: [PATCH 087/140] Create docs for NSC prebuilds Signed-off-by: Corey --- docs/nsc-prebuilds.md | 386 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 docs/nsc-prebuilds.md diff --git a/docs/nsc-prebuilds.md b/docs/nsc-prebuilds.md new file mode 100644 index 0000000000..4d57d7a8de --- /dev/null +++ b/docs/nsc-prebuilds.md @@ -0,0 +1,386 @@ +# NSC prebuilds (build-time HLSL -> SPIR-V) + +This document explains how to use `NBL_CREATE_NSC_COMPILE_RULES` together with `NBL_CREATE_RESOURCE_ARCHIVE` to: + +- Compile HLSL to SPIR-V at **build time** (via the `nsc` tool). +- Optionally generate **device-cap permutations** (limits/features "CAPS"). +- Generate a small C++ header with **type-safe key getters** (`get_spirv_key<...>()`). +- Make the same code work with `NBL_EMBED_BUILTIN_RESOURCES` **ON** (embedded virtual archive) and **OFF** (mounted build directory) when loading your precompiled SPIR-V at runtime. + +Definitions live in `cmake/common.cmake` (`NBL_CREATE_NSC_COMPILE_RULES`, `NBL_CREATE_RESOURCE_ARCHIVE`). + +## Runtime mounting requirement (important) + +All of this assumes your app mounts the directory/archive containing the NSC outputs (i.e. `BINARY_DIR`) into Nabla's virtual filesystem, then loads files via keys that are relative to that mounted root (the examples use `app_resources`). + +The examples "just work" because they inherit from `nbl::examples::BuiltinResourcesApplication`, which mounts: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: `system::CMountDirectoryArchive(NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT, ...)` at `app_resources` +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: the generated embedded archive (e.g. `nbl::this_example::builtin::build::CArchive`) at `app_resources` + +If you're writing your own app/extension and don't use `BuiltinResourcesApplication`, you must mount equivalently yourself (split by `NBL_EMBED_BUILTIN_RESOURCES`). Optionally set `IAssetLoader::SAssetLoadParams::workingDirectory` to whatever virtual root you want to load from. + +The `MOUNT_POINT_DEFINE` argument of `NBL_CREATE_NSC_COMPILE_RULES` defines a C/C++ macro whose value is the absolute path to the NSC output directory (`BINARY_DIR`) that you mount when builtins are off (in examples it's `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`). + +See `examples_tests/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp` for the exact mounting logic. + +## Why build-time NSC instead of runtime compilation? + +Build-time compilation is usually preferable because it: + +- Uses your build system's parallelism (Ninja/MSBuild jobs) to compile shaders quickly. +- Writes **only into the build tree** (no source tree pollution, easy clean/reconfigure). +- Lets CI validate "shaders compile" as part of a normal build. +- Enables fast runtime iteration: at runtime you only **pick** the right SPIR-V, you don't compile it. +- Makes shader compilation deterministic and reproducible (toolchain + flags captured by the build). + +Runtime compilation is still useful for prototyping, but (assuming you don't use a runtime shader cache) it can make startup slower and shift failures to runtime instead of CI/build (a cache can hide the repeated cost on subsequent runs; our current one has some rough edges: it writes into the source tree and has issues when compiling many inputs from the same source directory). + +## What `NBL_CREATE_NSC_COMPILE_RULES` produces + +For each registered input it generates: + +- One `.spv` output **per CMake configuration** (`Debug/`, `Release/`, `RelWithDebInfo/`). +- If you use `CAPS`, it generates a **cartesian product** of permutations and emits a `.spv` for each. +- A generated header (you choose the path via `INCLUDE`) containing: + - a primary template `get_spirv_key(limits, features)` and `get_spirv_key(device)` + - explicit specializations for each registered base `KEY` + - the returned key already includes the build config prefix (compiled into the header). + +Keys are strings that match the output layout: + +``` +/(._)(._)....spv +``` + +## The JSON "INPUTS" format + +`INPUTS` is a JSON array of objects. Each object supports: + +- `INPUT` (string, required): path to `.hlsl` (relative to `CMAKE_CURRENT_SOURCE_DIR` or absolute). +- `KEY` (string, required): base key (prefer without `.spv`; it is always appended, so using `foo.spv` will result in `foo.spv.spv`). +- `COMPILE_OPTIONS` (array of strings, optional): per-input extra options (e.g. `["-T","cs_6_8"]`). +- `DEPENDS` (array of strings, optional): per-input dependencies (extra files that should trigger rebuild). +- `CAPS` (array, optional): permutation caps (see below). + +You can register many rules in a single call, and you can call the function multiple times to append rules to the same `TARGET`. + +## Compile options (generator expressions, defaults, debug info) + +`NBL_CREATE_NSC_COMPILE_RULES` combines options from multiple sources: + +- Built-in defaults from the helper (see `cmake/common.cmake`): HLSL version, Vulkan SPIR-V target env, scalar layout, warnings, and per-config optimization flags (e.g. `-O0` for Debug, `-O3` for Release) implemented via CMake generator expressions. +- Global extra options via `COMMON_OPTIONS` (CMake list). +- Per-input extra options via JSON `COMPILE_OPTIONS` (array of strings). + +Both `COMMON_OPTIONS` and JSON `COMPILE_OPTIONS` support CMake generator expressions like `$<$:...>` (the helper uses them itself), so you can make flags configuration-dependent when needed. + +### Debug info for RenderDoc + +The helper also exposes CMake options that append NSC debug flags **only for Debug config** (via generator expressions). Enable them if you want RenderDoc to show source/line information instead of just raw disassembly: + +- `NSC_DEBUG_EDIF_FILE_BIT` (default `ON`) -> `-fspv-debug=file` +- `NSC_DEBUG_EDIF_TOOL_BIT` (default `ON`) -> `-fspv-debug=tool` +- `NSC_DEBUG_EDIF_SOURCE_BIT` (default `OFF`) -> `-fspv-debug=source` +- `NSC_DEBUG_EDIF_LINE_BIT` (default `OFF`) -> `-fspv-debug=line` +- `NSC_DEBUG_EDIF_NON_SEMANTIC_BIT` (default `OFF`) -> `-fspv-debug=vulkan-with-source` + +## Source files and rebuild dependencies (important) + +Make sure shader inputs and includes are: + +1. Marked as header-only on your target (so the IDE shows them, but the build system doesn't try to compile them with default HLSL rules like `fxc`): + +```cmake +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) +``` + +2. Listed as dependencies of the NSC custom commands (so editing any of them triggers a rebuild of the `.spv` outputs). + +This is what the `DEPENDS` argument of `NBL_CREATE_NSC_COMPILE_RULES` (and/or per-input JSON `DEPENDS`) is for. Always include the main `INPUT` file itself and any files it includes; otherwise the build system might not re-run `nsc` when you change them. + +## Minimal usage (no permutations) + +Example pattern (as in `examples_tests/27_MPMCScheduler/CMakeLists.txt`): + +```cmake +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.comp.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) +``` + +Then include the generated header and use the key to load the SPIR-V: + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" +// ... +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(device); +auto bundle = assetMgr->getAsset(key.c_str(), loadParams); +``` + +`OUTPUT_VAR` (here: `KEYS`) is assigned the list of **all** produced access keys (all configurations + all permutations). This list is intended to be fed into `NBL_CREATE_RESOURCE_ARCHIVE(BUILTINS ${KEYS})`. + +## Permutations via `CAPS` + +`CAPS` lets you prebuild multiple SPIR-V variants parameterized by device limits or features. + +Each `CAPS` entry looks like: + +- `kind` (string, optional): `"limits"` or `"features"` (defaults to `"limits"` if omitted/invalid). +- `name` (string, required): identifier used in both generated HLSL config and C++ key (must be a valid C/C++ identifier). +- `type` (string, required): `bool`, `uint16_t`, `uint32_t`, `uint64_t`. +- `values` (array of numbers, required): the values you want to prebuild. + - for `bool`, values must be `0` or `1`. + +At build time, NSC compiles each combination of values (cartesian product). At runtime, `get_spirv_key` appends suffixes using the `limits`/`features` you pass in. + +### Example: mixing `limits` and `features` + +This example permutes over one device limit and one device feature (order matters: the suffix order matches the `CAPS` array order): + +```cmake +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": ["app_resources/common.hlsl"], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + # ... + OUTPUT_VAR KEYS + INPUTS ${JSON} +) +``` + +This produces `3 * 2 = 6` permutations per build configuration, and `KEYS` contains all of them (for example): + +``` +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_0.spv +Debug/shader.maxComputeSharedMemorySize_16384.shaderFloat64_1.spv +... +``` + +Practical tip: for numeric limits you often want to "bucket" real device values into one of the prebuilt values. The CountingSort example does exactly that: + +- CMake definition: `examples_tests/10_CountingSort/CMakeLists.txt` +- Runtime bucketing: `examples_tests/10_CountingSort/main.cpp` + +```cpp +auto limits = m_physicalDevice->getLimits(); +constexpr std::array AllowedMaxComputeSharedMemorySizes = { 16384, 32768, 65536 }; + +auto upperBoundSharedMemSize = std::upper_bound( + AllowedMaxComputeSharedMemorySizes.begin(), AllowedMaxComputeSharedMemorySizes.end(), limits.maxComputeSharedMemorySize +); +// devices which support less than 16KB of max compute shared memory size are not supported +if (upperBoundSharedMemSize == AllowedMaxComputeSharedMemorySizes.begin()) +{ + m_logger->log("maxComputeSharedMemorySize is too low (%u)", ILogger::E_LOG_LEVEL::ELL_ERROR, limits.maxComputeSharedMemorySize); + exit(0); +} + +limits.maxComputeSharedMemorySize = *(upperBoundSharedMemSize - 1); + +auto key = nbl::this_example::builtin::build::get_spirv_key<"prefix_sum_shader">(limits, m_physicalDevice->getFeatures()); +``` + +## Pairing with `NBL_CREATE_RESOURCE_ARCHIVE` (works with builtins ON/OFF) + +The recommended pattern is to always call `NBL_CREATE_RESOURCE_ARCHIVE` right after the NSC rules, using the produced `KEYS` list: + +```cmake +NBL_CREATE_RESOURCE_ARCHIVE( + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} + NAMESPACE nbl::this_example::builtin::build +) +``` + +### How `BINARY_DIR`, `MOUNT_POINT_DEFINE`, and `BIND` fit together + +- In `NBL_CREATE_NSC_COMPILE_RULES`, `BINARY_DIR` is the output directory where NSC writes the compiled files: + - `${BINARY_DIR}//....spv` +- In `NBL_CREATE_NSC_COMPILE_RULES`, `MOUNT_POINT_DEFINE` is the *name* of a C/C++ preprocessor define whose value is set to the **absolute path** of `BINARY_DIR`. + - Example: `MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT` results in something like `-DNBL_THIS_EXAMPLE_BUILD_MOUNT_POINT="C:/.../auto-gen"` on the target. + - Keys returned by `get_spirv_key<...>()` are relative to that directory; the full path on disk is: + - `${NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT}/` +- In `NBL_CREATE_RESOURCE_ARCHIVE`, `BIND` should point at the same directory as `BINARY_DIR`. + - The `BUILTINS` list entries must be relative to `BIND`. + - This is why pairing it with `OUTPUT_VAR KEYS` works: `KEYS` is exactly the list of relative paths under `BINARY_DIR` that were generated by the NSC rules, so the archive generator knows what to serialize/embed. + +This is designed to work in both modes: + +- `NBL_EMBED_BUILTIN_RESOURCES=OFF`: + - `NBL_CREATE_RESOURCE_ARCHIVE` becomes a no-op (creates a dummy interface target). + - You load SPIR-V from the **build directory** mounted into the virtual filesystem. + - `MOUNT_POINT_DEFINE` provides an absolute path (e.g. `NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT`) for mounting. +- `NBL_EMBED_BUILTIN_RESOURCES=ON`: + - `NBL_CREATE_RESOURCE_ARCHIVE` generates a small library that embeds the listed files into a virtual archive and emits `.../CArchive.h` under the requested `NAMESPACE`. + - You mount the embedded archive instead of a directory; runtime loading code stays the same (keys don't change). + +## Notes / gotchas + +- `INCLUDE` must be a **relative** path (it is emitted under the build tree and added to include dirs automatically). +- Prefer not to include `.spv` in `KEY` (the extension is appended unconditionally); if you do, you'll just get `.spv.spv` in the final filename/key (not an error, just not what you want). +- You can mix: + - per-input `COMPILE_OPTIONS` (inside JSON), and + - global `COMMON_OPTIONS` (CMake list after `COMMON_OPTIONS`). + +## Troubleshooting (no logs / silent NSC failures) + +Sometimes an NSC compile rule fails during the build, but the build output doesn't show a useful log. In that case, run the failing command under a debugger: + +1. Open the generated Visual Studio solution and set the `nsc` project/target as the Startup Project. +2. Open the `nsc` project properties and set **Debugging -> Command Arguments**. +3. Copy the exact CLI from the failing "NSC Rules" custom command (the one that calls `nsc.exe`) into the Command Arguments field. +4. Start debugging (`F5`) and reproduce; if needed, put a breakpoint in the HLSL compiler/preprocessor codepath and step until you find the root cause. + +If the error looks like a preprocessing issue, note that we use Boost.Wave as the preprocessor; it can have quirky edge cases (e.g. needing a trailing newline/whitespace at the end of a file for correct parsing). + +## Best practices + +- Prefer compiling to a shader library (`-T lib_6_x`) and using multiple entry points when possible: fewer inputs means fewer compile rules and less build overhead; at runtime you still choose the entry point from the same `.spv`. +- Treat `CAPS` as a build-time cost multiplier (cartesian product). If the permutation count gets too large (thousands+), prebuilding usually stops paying off; an example of such workload is `examples_tests/23_Arithmetic2UnitTest`. + +## Complete example (expand) + +
+NSC rules + archive + runtime key usage + +### CMake (`CMakeLists.txt`) + +```cmake +include(common) + +nbl_create_executable_project("" "" "" "") + +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/shader.hlsl +) +target_sources(${EXECUTABLE_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "lib_6_8"], + "DEPENDS": [], + "CAPS": [ + { + "kind": "limits", + "name": "maxComputeSharedMemorySize", + "type": "uint32_t", + "values": [16384, 32768, 65536] + }, + { + "kind": "features", + "name": "shaderFloat64", + "type": "bool", + "values": [0, 1] + } + ] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +# Works for both NBL_EMBED_BUILTIN_RESOURCES=ON/OFF +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) +``` + +### Runtime usage (C++) + +```cpp +#include "nbl/this_example/builtin/build/spirv/keys.hpp" + +// Load relative to the VFS mount (examples mount it at "app_resources") +asset::IAssetLoader::SAssetLoadParams lp = {}; +lp.workingDirectory = "app_resources"; + +auto limits = device->getPhysicalDevice()->getLimits(); +limits.maxComputeSharedMemorySize = 32768; // one of the prebuilt values; real code should bucket/clamp with std::upper_bound (see the CountingSort snippet above) + +auto key = nbl::this_example::builtin::build::get_spirv_key<"shader">(limits, device->getEnabledFeatures()); +auto bundle = assetMgr->getAsset(key.c_str(), lp); +const auto assets = bundle.getContents(); +auto spvShader = asset::IAsset::castDown(assets[0]); + +// params.shader.shader = spvShader.get(); + +// If you compiled with `-T lib_6_x`, pick the entry point at pipeline creation time (e.g. `params.shader.entryPoint = "main";`). +``` + +
From a16cce22b6f36b26c829a02361fce73fd857b074 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 19 Dec 2025 18:15:57 +0700 Subject: [PATCH 088/140] Remove duplicate partial specialization for truncate and emulated_vec Signed-off-by: Corey --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 4eb8b7bf06..25b033c30e 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -587,53 +587,6 @@ NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) #undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST -#define NBL_EMULATED_VEC_PROMOTION(N) template\ -struct Promote, ComponentType>\ -{\ - using VecType = emulated_vector_t##N ;\ - NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ - {\ - array_set setter;\ - VecType promoted;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(promoted, i, v);\ - return promoted;\ - }\ -}; - -NBL_EMULATED_VEC_PROMOTION(2) -NBL_EMULATED_VEC_PROMOTION(3) -NBL_EMULATED_VEC_PROMOTION(4) - -#undef NBL_EMULATED_VEC_PROMOTION - -#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ -struct Truncate, emulated_vector_t##M >\ -{\ - using OutputVecType = emulated_vector_t##N ;\ - using InputVecType = emulated_vector_t##M ;\ - NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ - {\ - array_get getter;\ - array_set setter;\ - OutputVecType output;\ - [[unroll]]\ - for (int i = 0; i < N; ++i)\ - setter(output, i, getter(vec, i));\ - return output;\ - }\ -}; - -NBL_EMULATED_VEC_TRUNCATION(2, 2) -NBL_EMULATED_VEC_TRUNCATION(2, 3) -NBL_EMULATED_VEC_TRUNCATION(2, 4) -NBL_EMULATED_VEC_TRUNCATION(3, 3) -NBL_EMULATED_VEC_TRUNCATION(3, 4) -NBL_EMULATED_VEC_TRUNCATION(4, 4) - -#undef NBL_EMULATED_VEC_TRUNCATION - } //namespace impl } From c1f945fb62d850769d1a115df6c31c692c87ba17 Mon Sep 17 00:00:00 2001 From: Przemog1 Date: Sat, 20 Dec 2025 11:44:51 +0100 Subject: [PATCH 089/140] Updated DXC Signed-off-by: Corey --- 3rdparty/dxc/dxc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dxc/dxc b/3rdparty/dxc/dxc index ecd3f93521..d76c7890b1 160000 --- a/3rdparty/dxc/dxc +++ b/3rdparty/dxc/dxc @@ -1 +1 @@ -Subproject commit ecd3f93521f1aceabff64b14857f47f9a32c9958 +Subproject commit d76c7890b19ce0b344ee0ce116dbc1c92220ccea From a00d122aed1e4145253f9ea990fac41cbd33037c Mon Sep 17 00:00:00 2001 From: Fletterio Date: Mon, 22 Dec 2025 19:10:53 -0300 Subject: [PATCH 090/140] Adds a flag to NSC to support preprocessing shaders and storing the result Signed-off-by: Corey --- tools/nsc/main.cpp | 52 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index c4ce43b326..edc56de84c 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -153,6 +153,7 @@ class ShaderCompiler final : public system::IApplicationFramework }); }; + auto preprocessOnly = findOutputFlag("-P") != m_arguments.end(); auto output_flag_pos_fc = findOutputFlag("-Fc"); auto output_flag_pos_fo = findOutputFlag("-Fo"); if (output_flag_pos_fc != m_arguments.end() && output_flag_pos_fo != m_arguments.end()) { @@ -195,7 +196,8 @@ class ShaderCompiler final : public system::IApplicationFramework return false; } - m_logger->log("Compiled shader code will be saved to " + output_filepath, ILogger::ELL_INFO); + std::string outputType = preprocessOnly ? "Preprocessed" : "Compiled"; + m_logger->log(outputType + " shader code will be saved to " + output_filepath, ILogger::ELL_INFO); } #ifndef NBL_EMBED_BUILTIN_RESOURCES @@ -227,13 +229,27 @@ class ShaderCompiler final : public system::IApplicationFramework } auto start = std::chrono::high_resolution_clock::now(); - auto compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); + smart_refctd_ptr compilation_result; + std::string preprocessing_result; + std::string_view result_view; + if (preprocessOnly) + { + preprocessing_result = preprocess_shader(shader.get(), shaderStage, file_to_compile); + result_view = preprocessing_result; + } + else + { + compilation_result = compile_shader(shader.get(), shaderStage, file_to_compile); + result_view = { (const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize() }; + } auto end = std::chrono::high_resolution_clock::now(); - // writie compiled shader to file as bytes - if (compilation_result) + // write compiled/preprocessed shader to file as bytes + std::string operationType = preprocessOnly ? "preprocessing" : "compilation"; + const bool success = preprocessOnly ? preprocessing_result != std::string{} : bool(compilation_result); + if (success) { - m_logger->log("Shader compilation successful.", ILogger::ELL_INFO); + m_logger->log("Shader " + operationType + " successful.", ILogger::ELL_INFO); const auto took = std::to_string(std::chrono::duration_cast(end - start).count()); m_logger->log("Took %s ms.", ILogger::ELL_PERFORMANCE, took.c_str()); { @@ -258,7 +274,7 @@ class ShaderCompiler final : public system::IApplicationFramework return false; } - output_file.write((const char*)compilation_result->getContent()->getPointer(), compilation_result->getContent()->getSize()); + output_file.write(result_view.data(), result_view.size()); if (output_file.fail()) { @@ -279,7 +295,7 @@ class ShaderCompiler final : public system::IApplicationFramework } else { - m_logger->log("Shader compilation failed.", ILogger::ELL_ERROR); + m_logger->log("Shader " + operationType + " failed.", ILogger::ELL_ERROR); return false; } } @@ -291,6 +307,28 @@ class ShaderCompiler final : public system::IApplicationFramework private: + std::string preprocess_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { + smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + CHLSLCompiler::SPreprocessorOptions options = {}; + options.sourceIdentifier = sourceIdentifier; + options.logger = m_logger.get(); + + auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + auto includeLoader = includeFinder->getDefaultFileSystemLoader(); + + // because before real compilation we do preprocess the input it doesn't really matter we proxy include search direcotries further with dxcOptions since at the end all includes are resolved to single file + for (const auto& it : m_include_search_paths) + includeFinder->addSearchPath(it, includeLoader); + + options.includeFinder = includeFinder.get(); + + const char* code_ptr = (const char*)shader->getContent()->getPointer(); + std::string_view code({ code_ptr, strlen(code_ptr)}); + + return hlslcompiler->preprocessShader(std::string(code), shaderStage, options, nullptr); + } + core::smart_refctd_ptr compile_shader(const IShader* shader, hlsl::ShaderStage shaderStage, std::string_view sourceIdentifier) { smart_refctd_ptr hlslcompiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); From e42cc99edeb58ccd26b829df8274c95d7cfd01c9 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 14:50:43 +0100 Subject: [PATCH 091/140] update docker/compiler-explorer submodule Signed-off-by: Corey --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 45866dfa87..04c693f866 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 45866dfa8782404fc121f25ce15ad0626b474db0 +Subproject commit 04c693f8668d7f09f999434745afbb58fc9c7025 From 3347a4988908e65ce6e4930461ad4581eb7ac557 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 16:49:55 +0100 Subject: [PATCH 092/140] update docker/compiler-explorer submodule Signed-off-by: Corey --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 04c693f866..265166a574 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 04c693f8668d7f09f999434745afbb58fc9c7025 +Subproject commit 265166a574c3b0dae59e57d6d8605f0fa37c31e1 From 7d893266a3ea383a0739d4f47d7fc7b19eb75717 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 19:22:11 +0100 Subject: [PATCH 093/140] update docker/compiler-explorer submodule Signed-off-by: Corey --- docker/compiler-explorer | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/compiler-explorer b/docker/compiler-explorer index 265166a574..27318d12f8 160000 --- a/docker/compiler-explorer +++ b/docker/compiler-explorer @@ -1 +1 @@ -Subproject commit 265166a574c3b0dae59e57d6d8605f0fa37c31e1 +Subproject commit 27318d12f88cf34bd0444101e6e260b12f5063a0 From 184c27470086062638c11e050016134d0c2aea4b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 15:44:23 +0700 Subject: [PATCH 094/140] added debug draw aabb extension, moved from ex Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 84 ++++ .../builtin/hlsl/aabb_instances.fragment.hlsl | 13 + .../builtin/hlsl/aabb_instances.vertex.hlsl | 30 ++ .../ext/DebugDraw/builtin/hlsl/common.hlsl | 45 +++ src/nbl/ext/CMakeLists.txt | 15 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 360 ++++++++++++++++++ src/nbl/ext/DebugDraw/CMakeLists.txt | 42 ++ 7 files changed, 587 insertions(+), 2 deletions(-) create mode 100644 include/nbl/ext/DebugDraw/CDrawAABB.h create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl create mode 100644 src/nbl/ext/DebugDraw/CDrawAABB.cpp create mode 100644 src/nbl/ext/DebugDraw/CMakeLists.txt diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h new file mode 100644 index 0000000000..6be529ecfa --- /dev/null +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla + +#ifndef _NBL_EXT_DRAW_AABB_H_ +#define _NBL_EXT_DRAW_AABB_H_ + +#include "nbl/video/declarations.h" +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" + +namespace nbl::ext::debugdraw +{ +class DrawAABB final : public core::IReferenceCounted +{ +public: + struct SCachedCreationParameters + { + using streaming_buffer_t = video::StreamingTransientDataBufferST>; + + static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + core::smart_refctd_ptr utilities; + + //! optional, default MDI buffer allocated if not provided + core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; + + core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr renderpass = nullptr; + }; + + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer + static core::smart_refctd_ptr create(SCreationParameters&& params); + + // creates default pipeline layout for push constant version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + + // creates default pipeline layout for streaming version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + + static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + + inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } + + // records draw command for single AABB, user has to set pipeline outside + bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + + void clearAABBs(); + +protected: + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + ~DrawAABB() override; + +private: + static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static bool createStreamingBuffer(SCreationParameters& params); + + std::vector m_instances; + std::array m_unitAABBVertices; + + SCachedCreationParameters m_cachedCreationParams; + + core::smart_refctd_ptr m_pipeline; +}; +} + +#endif diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl new file mode 100644 index 0000000000..f17e028f91 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -0,0 +1,13 @@ +#pragma shader_stage(fragment) + +#include "common.hlsl" + +using namespace nbl::ext::debugdraw; + +[shader("pixel")] +float32_t4 main(PSInput input) : SV_TARGET +{ + float32_t4 outColor = input.color; + + return outColor; +} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl new file mode 100644 index 0000000000..8a54d40c5a --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -0,0 +1,30 @@ +#pragma shader_stage(vertex) + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debugdraw; + +[[vk::push_constant]] SPushConstants pc; + +[shader("vertex")] +PSInput main() +{ + PSInput output; + + float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); + + float32_t4x4 transform; + transform[0] = instance.transform[0]; + transform[1] = instance.transform[1]; + transform[2] = instance.transform[2]; + transform[3] = float32_t4(0, 0, 0, 1); + float32_t4 position = mul(transform, float32_t4(vertex, 1)); + output.position = mul(pc.MVP, position); + output.color = instance.color; + + return output; +} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl new file mode 100644 index 0000000000..2bcd378e40 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -0,0 +1,45 @@ +#ifndef _DRAW_AABB_COMMON_HLSL +#define _DRAW_AABB_COMMON_HLSL + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +namespace nbl +{ +namespace ext +{ +namespace debugdraw +{ + +struct InstanceData +{ +#ifdef __HLSL_VERSION + float32_t3x4 transform; +#else + float transform[3*4]; +#endif + nbl::hlsl::float32_t4 color; +}; + +struct SPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 MVP; +#else + float MVP[4*4]; +#endif + uint64_t pVertexBuffer; + uint64_t pInstanceBuffer; +}; + +#ifdef __HLSL_VERSION +struct PSInput +{ + float32_t4 position : SV_Position; + float32_t4 color : TEXCOORD0; +}; +#endif + +} +} +} +#endif diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index e0bcd223f6..da7835a4df 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -6,6 +6,7 @@ start_tracking_variables_for_propagation_to_parent() # TODO: all of those options bellow should be defined here option(NBL_BUILD_TEXT_RENDERING "Enable Nabla Text Rendering extension building and integration?" OFF) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension building and integration?" OFF) # TODO: also all variables bellow should be killed from build system since we have logical # targets which properties (like include search directories or outputs) can be queried @@ -54,6 +55,16 @@ if(NBL_BUILD_TEXT_RENDERING) add_subdirectory(TextRendering) endif() -propagate_changed_variables_to_parent_scope() +if(NBL_BUILD_DEBUG_DRAW) + add_subdirectory(DebugDraw) + set(NBL_EXT_DEBUG_DRAW_INCLUDE_DIRS + ${NBL_EXT_DEBUG_DRAW_INCLUDE_DIRS} + PARENT_SCOPE + ) + set(NBL_EXT_DEBUG_DRAW_LIB + ${NBL_EXT_DEBUG_DRAW_LIB} + PARENT_SCOPE + ) +endif() -NBL_ADJUST_FOLDERS(ext) \ No newline at end of file +propagate_changed_variables_to_parent_scope() \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp new file mode 100644 index 0000000000..3a17cf1b90 --- /dev/null +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -0,0 +1,360 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// TODO move this into nabla + +#include "nbl/ext/DebugDraw/CDrawAABB.h" + +using namespace nbl; +using namespace core; +using namespace video; +using namespace system; +using namespace asset; +using namespace hlsl; + +namespace nbl::ext::debugdraw +{ + +core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) +{ + auto* const logger = params.utilities->getLogger(); + + auto pipeline = createPipeline(params); + if (!pipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + if (!createStreamingBuffer(params)) + { + logger->log("Failed to create streaming buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); +} + +DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) + : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) +{ + const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + m_unitAABBVertices = getVerticesFromAABB(unitAABB); +} + +DrawAABB::~DrawAABB() +{ +} + +// note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly +constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ENTRY_KEY_; + +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) +{ + assert(system); + + if (!system) + return nullptr; + + // extension should mount everything for you, regardless if content goes from virtual filesystem + // or disk directly - and you should never rely on application framework to expose extension data + +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#else + auto NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY = (path(_ARCHIVE_ABSOLUTE_ENTRY_PATH_) / NBL_ARCHIVE_ENTRY).make_preferred(); + auto archive = make_smart_refctd_ptr(std::move(NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY), smart_refctd_ptr(logger), system); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#endif + + return smart_refctd_ptr(archive); +} + +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) +{ + auto system = smart_refctd_ptr(params.assetManager->getSystem()); + auto* set = params.assetManager->getCompilerSet(); + auto compiler = set->getShaderCompiler(IShader::E_CONTENT_TYPE::ECT_HLSL); + auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(system)); + auto includeLoader = includeFinder->getDefaultFileSystemLoader(); + includeFinder->addSearchPath(NBL_ARCHIVE_ENTRY.data(), includeLoader); + + auto compileShader = [&](const std::string& filePath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = params.utilities->getLogger(); + lparams.workingDirectory = NBL_ARCHIVE_ENTRY.data(); + auto bundle = params.assetManager->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) + { + params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); + exit(-1); + } + + const auto assets = bundle.getContents(); + assert(assets.size() == 1); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + if (!shaderSrc) + return nullptr; + + CHLSLCompiler::SOptions options = {}; + options.stage = stage; + options.preprocessorOptions.sourceIdentifier = filePath; + options.preprocessorOptions.logger = params.utilities->getLogger(); + options.preprocessorOptions.includeFinder = includeFinder.get(); + shaderSrc = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); + + return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); + }; + + if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + + auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); + auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; + pipelineParams[0].layout = params.pipelineLayout.get(); + pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; + pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + pipelineParams[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + pipelineParams[0].renderpass = params.renderpass.get(); + + smart_refctd_ptr pipeline; + params.utilities->getLogicalDevice()->createGraphicsPipelines(nullptr, pipelineParams, &pipeline); + if (!pipeline) + { + params.utilities->getLogger()->log("Could not create streaming pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + + return pipeline; +} + +bool DrawAABB::createStreamingBuffer(SCreationParameters& params) +{ + const uint32_t minStreamingBufferAllocationSize = 128u, maxStreamingBufferAllocationAlignment = 4096u, mdiBufferDefaultSize = /* 2MB */ 1024u * 1024u * 2u; + + auto getRequiredAccessFlags = [&](const bitflag& properties) + { + bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); + + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_READ; + if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) + flags |= IDeviceMemoryAllocation::EMCAF_WRITE; + + return flags; + }; + + if (!params.streamingBuffer) + { + IGPUBuffer::SCreationParams mdiCreationParams = {}; + mdiCreationParams.usage = SCachedCreationParameters::RequiredUsageFlags; + mdiCreationParams.size = mdiBufferDefaultSize; + + auto buffer = params.utilities->getLogicalDevice()->createBuffer(std::move(mdiCreationParams)); + buffer->setObjectDebugName("AABB Streaming Buffer"); + + auto memoryReqs = buffer->getMemoryReqs(); + memoryReqs.memoryTypeBits &= params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); + + auto allocation = params.utilities->getLogicalDevice()->allocate(memoryReqs, buffer.get(), SCachedCreationParameters::RequiredAllocateFlags); + { + const bool allocated = allocation.isValid(); + assert(allocated); + } + auto memory = allocation.memory; + + if (!memory->map({ 0ull, memoryReqs.size }, getRequiredAccessFlags(memory->getMemoryPropertyFlags()))) + params.utilities->getLogger()->log("Could not map device memory!", ILogger::ELL_ERROR); + + params.streamingBuffer = make_smart_refctd_ptr(SBufferRange{0ull, mdiCreationParams.size, std::move(buffer)}, maxStreamingBufferAllocationAlignment, minStreamingBufferAllocationSize); + } + + auto buffer = params.streamingBuffer->getBuffer(); + auto binding = buffer->getBoundMemory(); + + const auto validation = std::to_array + ({ + std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), + std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), + std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) + std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") + }); + + for (const auto& [ok, error] : validation) + if (!ok) + { + params.utilities->getLogger()->log(error, ILogger::ELL_ERROR); + return false; + } + + return true; +} + +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) +{ + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) +{ + SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, + .offset = 0, + .size = sizeof(SPushConstants) + }; + return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); +} + +smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +{ + smart_refctd_ptr pipeline; + + video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; + params[0].layout = layout; + params[0].vertexShader = vertex; + params[0].fragmentShader = fragment; + params[0].cached = { + .primitiveAssembly = { + .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, + } + }; + params[0].renderpass = renderpass; + + device->createGraphicsPipelines(nullptr, params, &pipeline); + + return pipeline; +} + +bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) +{ + commandBuffer->setLineWidth(1.f); + commandBuffer->draw(24, 1, 0, 0); + + return true; +} + +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) +{ + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); + + auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + + auto instancesIt = m_instances.begin(); + const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); + const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; + const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); + using suballocator_t = core::LinearAddressAllocatorST; + while (instancesIt != m_instances.end()) + { + const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); + memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); + instancesIt += instanceCount; + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); + pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + } + // end loop + + return true; +} + +std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) +{ + const auto& pMin = aabb.MinEdge; + const auto& pMax = aabb.MaxEdge; + + std::array vertices; + vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); + + vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); + vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); + vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); + + vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); + vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); + vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); + vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); + + vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); + vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); + vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); + vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); + + return vertices; +} + +void DrawAABB::addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) +{ + InstanceData instance; + instance.color = color; + + core::matrix3x4SIMD instanceTransform; + instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); + const auto diagonal = aabb.MaxEdge - aabb.MinEdge; + instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); + memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + + m_instances.push_back(instance); +} + +void DrawAABB::clearAABBs() +{ + m_instances.clear(); +} + +} diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt new file mode 100644 index 0000000000..3011fe5b4c --- /dev/null +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -0,0 +1,42 @@ +include(${NBL_ROOT_PATH}/cmake/common.cmake) + +set(NBL_EXT_INTERNAL_INCLUDE_DIR "${NBL_ROOT_PATH}/include") + +set(NBL_EXT_DEBUG_DRAW_H + ${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext/DebugDraw/CDrawAABB.h +) + +set(NBL_EXT_DEBUG_DRAW_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/CDrawAABB.cpp" +) + +nbl_create_ext_library_project( + DEBUG_DRAW + "${NBL_EXT_DEBUG_DRAW_H}" + "${NBL_EXT_DEBUG_DRAW_SRC}" + "${NBL_EXT_DEBUG_DRAW_EXTERNAL_INCLUDE}" + "" + "" +) + +# this should be standard for all extensions +set(_ARCHIVE_ENTRY_KEY_ "DebugDraw/builtin/hlsl") # then each one has unique archive key +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext" ABSOLUTE) +get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) +get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_ENTRY_PATH_="${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}") +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ENTRY_KEY_="${_ARCHIVE_ENTRY_KEY_}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ extDebugDrawbuiltinResourceData) + + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debugdraw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) +endif() + +set(NBL_EXT_DEBUG_DRAW_TARGET ${LIB_NAME} CACHE INTERNAL "Nabla's Debug Draw logical target name") From e642b558408328968dbf5e67ee43b6db2c5d13e1 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 8 Jul 2025 15:49:14 +0700 Subject: [PATCH 095/140] removed todos Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 -- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 6be529ecfa..13ca3a1ece 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla - #ifndef _NBL_EXT_DRAW_AABB_H_ #define _NBL_EXT_DRAW_AABB_H_ diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3a17cf1b90..b77630a2c2 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// TODO move this into nabla - #include "nbl/ext/DebugDraw/CDrawAABB.h" using namespace nbl; @@ -294,7 +292,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); } - // end loop return true; } From 6e49dc71b6f3c608a215d1814603ab593eda2af7 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 9 Jul 2025 15:19:50 +0700 Subject: [PATCH 096/140] support hlsl AABBs, also OBBs with transform Signed-off-by: Corey --- CMakeLists.txt | 1 + include/nbl/config/BuildConfigOptions.h.in | 3 ++- include/nbl/ext/DebugDraw/CDrawAABB.h | 4 ++++ src/nbl/ext/CMakeLists.txt | 1 - src/nbl/ext/DebugDraw/CDrawAABB.cpp | 23 +++++++++++++++++++--- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2235512d1f..cdb62c4b1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,6 +175,7 @@ option(NBL_FAST_MATH "Enable fast low-precision math" OFF) # the reason OFF is b option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" OFF) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) diff --git a/include/nbl/config/BuildConfigOptions.h.in b/include/nbl/config/BuildConfigOptions.h.in index c67c942217..f544562a57 100644 --- a/include/nbl/config/BuildConfigOptions.h.in +++ b/include/nbl/config/BuildConfigOptions.h.in @@ -59,7 +59,8 @@ #cmakedefine _NBL_BUILD_DPL_ -// ! +#cmakedefine NBL_BUILD_DEBUG_DRAW + // TODO: This has to disapppear from the main header and go to the OptiX extension header + config #cmakedefine OPTIX_INCLUDE_DIR "@OPTIX_INCLUDE_DIR@" diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 13ca3a1ece..08b3cf5a3c 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -7,6 +7,7 @@ #include "nbl/video/declarations.h" #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/shapes/aabb.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" namespace nbl::ext::debugdraw @@ -59,6 +60,9 @@ class DrawAABB final : public core::IReferenceCounted static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); diff --git a/src/nbl/ext/CMakeLists.txt b/src/nbl/ext/CMakeLists.txt index da7835a4df..6271e912b9 100644 --- a/src/nbl/ext/CMakeLists.txt +++ b/src/nbl/ext/CMakeLists.txt @@ -6,7 +6,6 @@ start_tracking_variables_for_propagation_to_parent() # TODO: all of those options bellow should be defined here option(NBL_BUILD_TEXT_RENDERING "Enable Nabla Text Rendering extension building and integration?" OFF) -option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension building and integration?" OFF) # TODO: also all variables bellow should be killed from build system since we have logical # targets which properties (like include search directories or outputs) can be queried diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index b77630a2c2..6cb2f365ec 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -256,6 +256,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(streamingPtr); commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + commandBuffer->setLineWidth(1.f); auto instancesIt = m_instances.begin(); const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); @@ -336,14 +337,30 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) +{ + addAABB(shapes::AABB<3, float>{{aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z}, { aabb.MaxEdge.X, aabb.MaxEdge.Y, aabb.MaxEdge.Z }}, color); +} + +void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) +{ + const auto transform = hlsl::float32_t3x4(1); + addOBB(aabb, transform, color); +} + +void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color) { InstanceData instance; instance.color = color; core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z, 0)); - const auto diagonal = aabb.MaxEdge - aabb.MinEdge; - instanceTransform.setScale(core::vectorSIMDf(diagonal.X, diagonal.Y, diagonal.Z)); + instanceTransform.setTranslation(core::vectorSIMDf(aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, 0)); + const auto diagonal = aabb.getExtent(); + instanceTransform.setScale(core::vectorSIMDf(diagonal.x, diagonal.y, diagonal.z)); + + core::matrix3x4SIMD worldTransform; + memcpy(worldTransform.pointer(), &transform, sizeof(transform)); + + instanceTransform = core::concatenateBFollowedByA(worldTransform, instanceTransform); memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); m_instances.push_back(instance); From 9211c07b7ad7a3f6f7d0ece15e63e670dd9c4a1d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 18 Aug 2025 16:47:58 +0700 Subject: [PATCH 097/140] minor syntax changes Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 92 +++++++++---------- .../builtin/hlsl/aabb_instances.fragment.hlsl | 2 +- .../builtin/hlsl/aabb_instances.vertex.hlsl | 7 +- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 6 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 8 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 2 +- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 08b3cf5a3c..034a0321bf 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -10,76 +10,76 @@ #include "nbl/builtin/hlsl/shapes/aabb.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" -namespace nbl::ext::debugdraw +namespace nbl::ext::debug_draw { class DrawAABB final : public core::IReferenceCounted { -public: - struct SCachedCreationParameters - { - using streaming_buffer_t = video::StreamingTransientDataBufferST>; + public: + struct SCachedCreationParameters + { + using streaming_buffer_t = video::StreamingTransientDataBufferST>; - static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - core::smart_refctd_ptr utilities; + core::smart_refctd_ptr utilities; - //! optional, default MDI buffer allocated if not provided - core::smart_refctd_ptr streamingBuffer = nullptr; - }; - - struct SCreationParameters : SCachedCreationParameters - { - core::smart_refctd_ptr assetManager = nullptr; + //! optional, default MDI buffer allocated if not provided + core::smart_refctd_ptr streamingBuffer = nullptr; + }; + + struct SCreationParameters : SCachedCreationParameters + { + core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr pipelineLayout; - core::smart_refctd_ptr renderpass = nullptr; - }; + core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr renderpass = nullptr; + }; - // creates an instance that can draw one AABB via push constant or multiple using streaming buffer - static core::smart_refctd_ptr create(SCreationParameters&& params); + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer + static core::smart_refctd_ptr create(SCreationParameters&& params); - // creates default pipeline layout for push constant version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + // creates default pipeline layout for push constant version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - // creates default pipeline layout for streaming version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + // creates default pipeline layout for streaming version + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); + static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); - inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } + inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } - // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + // records draw command for single AABB, user has to set pipeline outside + bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); - static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - void clearAABBs(); + void clearAABBs(); -protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); - ~DrawAABB() override; + protected: + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + ~DrawAABB() override; -private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params); - static bool createStreamingBuffer(SCreationParameters& params); + private: + static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static bool createStreamingBuffer(SCreationParameters& params); - std::vector m_instances; - std::array m_unitAABBVertices; + std::vector m_instances; + std::array m_unitAABBVertices; - SCachedCreationParameters m_cachedCreationParams; + SCachedCreationParameters m_cachedCreationParams; - core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_pipeline; }; } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl index f17e028f91..686e8934db 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -2,7 +2,7 @@ #include "common.hlsl" -using namespace nbl::ext::debugdraw; +using namespace nbl::ext::debug_draw; [shader("pixel")] float32_t4 main(PSInput input) : SV_TARGET diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 8a54d40c5a..ff993f8541 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -1,11 +1,12 @@ #pragma shader_stage(vertex) +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "common.hlsl" using namespace nbl::hlsl; -using namespace nbl::ext::debugdraw; +using namespace nbl::ext::debug_draw; [[vk::push_constant]] SPushConstants pc; @@ -15,14 +16,14 @@ PSInput main() PSInput output; float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); - InstanceData instance = vk::RawBufferLoad(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()); + InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); float32_t4x4 transform; transform[0] = instance.transform[0]; transform[1] = instance.transform[1]; transform[2] = instance.transform[2]; transform[3] = float32_t4(0, 0, 0, 1); - float32_t4 position = mul(transform, float32_t4(vertex, 1)); + float32_t4 position = math::linalg::promoted_mul(transform, vertex); output.position = mul(pc.MVP, position); output.color = instance.color; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 2bcd378e40..ec05d5c73b 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -1,5 +1,5 @@ -#ifndef _DRAW_AABB_COMMON_HLSL -#define _DRAW_AABB_COMMON_HLSL +#ifndef _NBL_DEBUG_DRAW_EXT_COMMON_HLSL +#define _NBL_DEBUG_DRAW_EXT_COMMON_HLSL #include "nbl/builtin/hlsl/cpp_compat.hlsl" @@ -7,7 +7,7 @@ namespace nbl { namespace ext { -namespace debugdraw +namespace debug_draw { struct InstanceData diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 6cb2f365ec..2a6f6f67ea 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -11,7 +11,7 @@ using namespace system; using namespace asset; using namespace hlsl; -namespace nbl::ext::debugdraw +namespace nbl::ext::debug_draw { core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) @@ -113,6 +113,12 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + if (!vertexShader || !fragmentShader) + { + params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); + return nullptr; + } + video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = params.pipelineLayout.get(); pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 3011fe5b4c..7e97cb74a4 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -35,7 +35,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debugdraw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debug_draw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) endif() From 6267b9f1f803b139204079f4c0495473c275c007 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Aug 2025 11:31:33 +0700 Subject: [PATCH 098/140] use hlsl cpp compat matrices, aabb Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +-- .../builtin/hlsl/aabb_instances.vertex.hlsl | 8 +--- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 13 +----- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 45 ++++++++++--------- 4 files changed, 28 insertions(+), 45 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 034a0321bf..721f39b796 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -55,15 +55,12 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); static std::array getVerticesFromAABB(const core::aabbox3d& aabb); - void addAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - + void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index ff993f8541..929ff2e60d 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -18,13 +18,7 @@ PSInput main() float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); - float32_t4x4 transform; - transform[0] = instance.transform[0]; - transform[1] = instance.transform[1]; - transform[2] = instance.transform[2]; - transform[3] = float32_t4(0, 0, 0, 1); - float32_t4 position = math::linalg::promoted_mul(transform, vertex); - output.position = mul(pc.MVP, position); + output.position = math::linalg::promoted_mul(instance.transform, vertex); output.color = instance.color; return output; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index ec05d5c73b..4502b04aa0 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -12,21 +12,12 @@ namespace debug_draw struct InstanceData { -#ifdef __HLSL_VERSION - float32_t3x4 transform; -#else - float transform[3*4]; -#endif - nbl::hlsl::float32_t4 color; + hlsl::float32_t4x4 transform; + hlsl::float32_t4 color; }; struct SPushConstants { -#ifdef __HLSL_VERSION - float32_t4x4 MVP; -#else - float MVP[4*4]; -#endif uint64_t pVertexBuffer; uint64_t pInstanceBuffer; }; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 2a6f6f67ea..799c9f2c9e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/DebugDraw/CDrawAABB.h" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" using namespace nbl; using namespace core; @@ -247,7 +248,7 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, float* cameraMat3x4) +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat) { using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); @@ -261,17 +262,23 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); // move outside of loop, only bind once + commandBuffer->bindGraphicsPipeline(m_pipeline.get()); commandBuffer->setLineWidth(1.f); - auto instancesIt = m_instances.begin(); + auto instances = m_instances; + for (auto& inst : instances) + { + inst.transform = hlsl::mul(cameraMat, inst.transform); + } + + auto instancesIt = instances.begin(); const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != m_instances.end()) + while (instancesIt != instances.end()) { - const uint32_t instanceCount = min(instancesPerIter, m_instances.size()); + const uint32_t instanceCount = min(instancesPerIter, instances.size()); offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); @@ -290,7 +297,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - memcpy(pc.MVP, cameraMat3x4, sizeof(pc.MVP)); pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; @@ -342,33 +348,28 @@ std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb, const hlsl::float32_t4& color) -{ - addAABB(shapes::AABB<3, float>{{aabb.MinEdge.X, aabb.MinEdge.Y, aabb.MinEdge.Z}, { aabb.MaxEdge.X, aabb.MaxEdge.Y, aabb.MaxEdge.Z }}, color); -} - void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { - const auto transform = hlsl::float32_t3x4(1); + const auto transform = hlsl::float32_t4x4(1); addOBB(aabb, transform, color); } -void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t3x4 transform, const hlsl::float32_t4& color) +void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color) { InstanceData instance; instance.color = color; - - core::matrix3x4SIMD instanceTransform; - instanceTransform.setTranslation(core::vectorSIMDf(aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, 0)); const auto diagonal = aabb.getExtent(); - instanceTransform.setScale(core::vectorSIMDf(diagonal.x, diagonal.y, diagonal.z)); - - core::matrix3x4SIMD worldTransform; - memcpy(worldTransform.pointer(), &transform, sizeof(transform)); - instanceTransform = core::concatenateBFollowedByA(worldTransform, instanceTransform); - memcpy(instance.transform, instanceTransform.pointer(), sizeof(core::matrix3x4SIMD)); + hlsl::float32_t4x4 instanceTransform; + instanceTransform[0][3] = aabb.minVx.x; + instanceTransform[1][3] = aabb.minVx.y; + instanceTransform[2][3] = aabb.minVx.z; + instanceTransform[3][3] = 1.f; + instanceTransform[0][0] = diagonal.x; + instanceTransform[1][1] = diagonal.y; + instanceTransform[2][2] = diagonal.z; + instance.transform = math::linalg::promoted_mul(transform, instanceTransform); m_instances.push_back(instance); } From 47f40aa9dcfcb0012c75179719a821470d23c5f8 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 19 Aug 2025 15:30:43 +0700 Subject: [PATCH 099/140] change batch render to use indexed draw Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 11 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 152 ++++++++++++++++++-------- 2 files changed, 117 insertions(+), 46 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 721f39b796..78b32638e2 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -15,6 +15,8 @@ namespace nbl::ext::debug_draw class DrawAABB final : public core::IReferenceCounted { public: + static constexpr inline uint32_t IndicesCount = 24u; + struct SCachedCreationParameters { using streaming_buffer_t = video::StreamingTransientDataBufferST>; @@ -30,6 +32,7 @@ class DrawAABB final : public core::IReferenceCounted struct SCreationParameters : SCachedCreationParameters { + video::IQueue* transfer = nullptr; core::smart_refctd_ptr assetManager = nullptr; core::smart_refctd_ptr pipelineLayout; @@ -57,22 +60,24 @@ class DrawAABB final : public core::IReferenceCounted bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); - static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + //static std::array getVerticesFromAABB(const core::aabbox3d& aabb); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline); + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline, core::smart_refctd_ptr indicesBuffer); ~DrawAABB() override; private: static core::smart_refctd_ptr createPipeline(SCreationParameters& params); static bool createStreamingBuffer(SCreationParameters& params); + static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); std::vector m_instances; - std::array m_unitAABBVertices; + std::array m_unitAABBVertices; + core::smart_refctd_ptr m_indicesBuffer; SCachedCreationParameters m_cachedCreationParams; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 799c9f2c9e..676f8eafa2 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -32,14 +32,31 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline)); + auto indicesBuffer = createIndicesBuffer(params); + if (!indicesBuffer) + { + logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); + return nullptr; + } + + return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline, indicesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline) - : m_cachedCreationParams(std::move(params)), m_pipeline(pipeline) +DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline, smart_refctd_ptr indicesBuffer) + : m_cachedCreationParams(std::move(params)), m_pipeline(std::move(pipeline)), m_indicesBuffer(std::move(indicesBuffer)) { const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - m_unitAABBVertices = getVerticesFromAABB(unitAABB); + float32_t3 pMin = { 0, 0, 0 }; + float32_t3 pMax = { 1, 1, 1 }; + + m_unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); + m_unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); + m_unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); + m_unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); + m_unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); + m_unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); + m_unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); + m_unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); } DrawAABB::~DrawAABB() @@ -205,6 +222,53 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) return true; } +smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& params) +{ + std::array unitAABBIndices; + unitAABBIndices[0] = 0; + unitAABBIndices[1] = 1; + unitAABBIndices[2] = 0; + unitAABBIndices[3] = 2; + + unitAABBIndices[4] = 3; + unitAABBIndices[5] = 1; + unitAABBIndices[6] = 3; + unitAABBIndices[7] = 2; + + unitAABBIndices[8] = 4; + unitAABBIndices[9] = 5; + unitAABBIndices[10] = 4; + unitAABBIndices[11] = 6; + + unitAABBIndices[12] = 7; + unitAABBIndices[13] = 5; + unitAABBIndices[14] = 7; + unitAABBIndices[15] = 6; + + unitAABBIndices[16] = 0; + unitAABBIndices[17] = 4; + unitAABBIndices[18] = 1; + unitAABBIndices[19] = 5; + + unitAABBIndices[20] = 2; + unitAABBIndices[21] = 6; + unitAABBIndices[22] = 3; + unitAABBIndices[23] = 7; + + IGPUBuffer::SCreationParams bufparams; + bufparams.size = sizeof(uint32_t) * unitAABBIndices.size(); + bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + + smart_refctd_ptr indicesBuffer; + params.utilities->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = params.transfer }, + std::move(bufparams), + unitAABBIndices.data() + ).move_into(indicesBuffer); + + return indicesBuffer; +} + core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); @@ -264,6 +328,8 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa commandBuffer->bindGraphicsPipeline(m_pipeline.get()); commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); auto instances = m_instances; for (auto& inst : instances) @@ -301,7 +367,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->draw(m_unitAABBVertices.size(), instanceCount, 0, 0); + commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); } @@ -309,44 +375,44 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa return true; } -std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) -{ - const auto& pMin = aabb.MinEdge; - const auto& pMax = aabb.MaxEdge; - - std::array vertices; - vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); - - vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); - vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); - vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); - - vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); - vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); - vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); - vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); - - vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); - vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); - vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); - vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); - - return vertices; -} +//std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) +//{ +// const auto& pMin = aabb.MinEdge; +// const auto& pMax = aabb.MaxEdge; +// +// std::array vertices; +// vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// +// vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// +// vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// +// vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// +// vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 +// vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 +// vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 +// vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 +// +// vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 +// vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 +// vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 +// vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 +// +// return vertices; +//} void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { From c90dfcfcaad8c751a60bb75b3b408eac2a399f4b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 10:22:49 +0700 Subject: [PATCH 100/140] simplified single AABB draw Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 25 +-- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 6 + .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 23 +++ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 184 ++++++++---------- src/nbl/ext/DebugDraw/CMakeLists.txt | 1 + 5 files changed, 129 insertions(+), 110 deletions(-) create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 78b32638e2..dad33bea27 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -16,6 +16,7 @@ class DrawAABB final : public core::IReferenceCounted { public: static constexpr inline uint32_t IndicesCount = 24u; + static constexpr inline uint32_t VerticesCount = 8u; struct SCachedCreationParameters { @@ -35,53 +36,55 @@ class DrawAABB final : public core::IReferenceCounted video::IQueue* transfer = nullptr; core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr pipelineLayout; + core::smart_refctd_ptr singlePipelineLayout; + core::smart_refctd_ptr batchPipelineLayout; core::smart_refctd_ptr renderpass = nullptr; }; // creates an instance that can draw one AABB via push constant or multiple using streaming buffer static core::smart_refctd_ptr create(SCreationParameters&& params); - // creates default pipeline layout for push constant version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); + // creates pipeline layout from push constant range + static core::smart_refctd_ptr createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); // creates default pipeline layout for streaming version static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); - static core::smart_refctd_ptr createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment); - //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer); + bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); - //static std::array getVerticesFromAABB(const core::aabbox3d& aabb); + static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); void clearAABBs(); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr pipeline, core::smart_refctd_ptr indicesBuffer); + DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, + core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); ~DrawAABB() override; private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params); + static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); + static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); std::vector m_instances; - std::array m_unitAABBVertices; core::smart_refctd_ptr m_indicesBuffer; + core::smart_refctd_ptr m_verticesBuffer; SCachedCreationParameters m_cachedCreationParams; - core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_singlePipeline; + core::smart_refctd_ptr m_batchPipeline; }; } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 4502b04aa0..03a3bbfa49 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -16,6 +16,12 @@ struct InstanceData hlsl::float32_t4 color; }; +struct SSinglePushConstants +{ + uint64_t pVertexBuffer; + InstanceData instance; +}; + struct SPushConstants { uint64_t pVertexBuffer; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl new file mode 100644 index 0000000000..e9b68a811c --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -0,0 +1,23 @@ +#pragma shader_stage(vertex) + +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#include "common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debug_draw; + +[[vk::push_constant]] SSinglePushConstants pc; + +[shader("vertex")] +PSInput main() +{ + PSInput output; + float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + + output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); + output.color = pc.instance.color; + + return output; +} \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 676f8eafa2..53a6e3cebb 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -19,8 +19,14 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - auto pipeline = createPipeline(params); - if (!pipeline) + auto singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!singlePipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + auto batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; @@ -38,25 +44,21 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } + auto verticesBuffer = createVerticesBuffer(params); + if (!verticesBuffer) + { + logger->log("Failed to create vertices buffer!", ILogger::ELL_ERROR); + return nullptr; + } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), pipeline, indicesBuffer)); + return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer, verticesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, smart_refctd_ptr pipeline, smart_refctd_ptr indicesBuffer) - : m_cachedCreationParams(std::move(params)), m_pipeline(std::move(pipeline)), m_indicesBuffer(std::move(indicesBuffer)) +DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, + smart_refctd_ptr indicesBuffer, smart_refctd_ptr verticesBuffer) + : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), + m_indicesBuffer(std::move(indicesBuffer)), m_verticesBuffer(std::move(verticesBuffer)) { - const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - float32_t3 pMin = { 0, 0, 0 }; - float32_t3 pMax = { 1, 1, 1 }; - - m_unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); - m_unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); - m_unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); - m_unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); - m_unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); - m_unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); - m_unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); - m_unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); } DrawAABB::~DrawAABB() @@ -88,7 +90,7 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params) +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); auto* set = params.assetManager->getCompilerSet(); @@ -128,8 +130,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); - auto vertexShader = compileShader("aabb_instances.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto fragmentShader = compileShader("aabb_instances.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); + auto fragmentShader = compileShader(fsPath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); if (!vertexShader || !fragmentShader) { @@ -138,7 +140,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet } video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; - pipelineParams[0].layout = params.pipelineLayout.get(); + pipelineParams[0].layout = pipelineLayout; pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; pipelineParams[0].cached = { @@ -269,7 +271,37 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& return indicesBuffer; } -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) +smart_refctd_ptr DrawAABB::createVerticesBuffer(SCreationParameters& params) +{ + const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); + float32_t3 pMin = { 0, 0, 0 }; + float32_t3 pMax = { 1, 1, 1 }; + + std::array unitAABBVertices; + unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); + unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); + unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); + unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); + unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); + unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); + unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); + unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); + + IGPUBuffer::SCreationParams bufparams; + bufparams.size = sizeof(float32_t3) * unitAABBVertices.size(); + bufparams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + smart_refctd_ptr vertexBuffer; + params.utilities->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{ .queue = params.transfer }, + std::move(bufparams), + unitAABBVertices.data() + ).move_into(vertexBuffer); + + return vertexBuffer; +} + +core::smart_refctd_ptr DrawAABB::createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } @@ -284,30 +316,22 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } -smart_refctd_ptr DrawAABB::createDefaultPipeline(video::ILogicalDevice* device, video::IGPUPipelineLayout* layout, video::IGPURenderpass* renderpass, video::IGPUGraphicsPipeline::SShaderSpecInfo& vertex, video::IGPUGraphicsPipeline::SShaderSpecInfo& fragment) +bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) { - smart_refctd_ptr pipeline; - - video::IGPUGraphicsPipeline::SCreationParams params[1] = {}; - params[0].layout = layout; - params[0].vertexShader = vertex; - params[0].fragmentShader = fragment; - params[0].cached = { - .primitiveAssembly = { - .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, - } - }; - params[0].renderpass = renderpass; - - device->createGraphicsPipelines(nullptr, params, &pipeline); + commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); + commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - return pipeline; -} + SSinglePushConstants pc; + pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); -bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer) -{ - commandBuffer->setLineWidth(1.f); - commandBuffer->draw(24, 1, 0, 0); + hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); + pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); + pc.instance.color = color; + + commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); + commandBuffer->drawIndexed(IndicesCount, 1, 0, 0, 0); return true; } @@ -326,7 +350,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); - commandBuffer->bindGraphicsPipeline(m_pipeline.get()); + commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); commandBuffer->setLineWidth(1.f); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); @@ -338,9 +362,7 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa } auto instancesIt = instances.begin(); - const uint32_t verticesByteSize = sizeof(float32_t3) * m_unitAABBVertices.size(); - const uint32_t availableInstancesByteSize = streaming->getBuffer()->getSize() - verticesByteSize; - const uint32_t instancesPerIter = availableInstancesByteSize / sizeof(InstanceData); + const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; while (instancesIt != instances.end()) { @@ -348,7 +370,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t vertexByteOffset = imaginaryChunk.alloc_addr(verticesByteSize, sizeof(float32_t3)); uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); const uint32_t totalSize = imaginaryChunk.get_allocated_size(); @@ -356,17 +377,16 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - memcpy(streamingPtr + vertexByteOffset, m_unitAABBVertices.data(), sizeof(m_unitAABBVertices[0]) * m_unitAABBVertices.size()); memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); instancesIt += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - pc.pVertexBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + vertexByteOffset; + pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - commandBuffer->pushConstants(m_pipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); @@ -375,44 +395,19 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa return true; } -//std::array DrawAABB::getVerticesFromAABB(const core::aabbox3d& aabb) -//{ -// const auto& pMin = aabb.MinEdge; -// const auto& pMax = aabb.MaxEdge; -// -// std::array vertices; -// vertices[0] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[1] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[2] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[3] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// -// vertices[4] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[5] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[6] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[7] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// -// vertices[8] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[9] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// vertices[10] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[11] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// -// vertices[12] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// vertices[13] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// vertices[14] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// vertices[15] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// -// vertices[16] = float32_t3(pMin.X, pMin.Y, pMin.Z); // 0 -// vertices[17] = float32_t3(pMin.X, pMax.Y, pMin.Z); // 4 -// vertices[18] = float32_t3(pMax.X, pMin.Y, pMin.Z); // 1 -// vertices[19] = float32_t3(pMax.X, pMax.Y, pMin.Z); // 5 -// -// vertices[20] = float32_t3(pMin.X, pMin.Y, pMax.Z); // 2 -// vertices[21] = float32_t3(pMin.X, pMax.Y, pMax.Z); // 6 -// vertices[22] = float32_t3(pMax.X, pMin.Y, pMax.Z); // 3 -// vertices[23] = float32_t3(pMax.X, pMax.Y, pMax.Z); // 7 -// -// return vertices; -//} +hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) +{ + const auto diagonal = aabb.getExtent(); + hlsl::float32_t4x4 transform; + transform[0][3] = aabb.minVx.x; + transform[1][3] = aabb.minVx.y; + transform[2][3] = aabb.minVx.z; + transform[3][3] = 1.f; + transform[0][0] = diagonal.x; + transform[1][1] = diagonal.y; + transform[2][2] = diagonal.z; + return transform; +} void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) { @@ -424,17 +419,8 @@ void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::floa { InstanceData instance; instance.color = color; - const auto diagonal = aabb.getExtent(); - - hlsl::float32_t4x4 instanceTransform; - instanceTransform[0][3] = aabb.minVx.x; - instanceTransform[1][3] = aabb.minVx.y; - instanceTransform[2][3] = aabb.minVx.z; - instanceTransform[3][3] = 1.f; - instanceTransform[0][0] = diagonal.x; - instanceTransform[1][1] = diagonal.y; - instanceTransform[2][2] = diagonal.z; + hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); instance.transform = math::linalg::promoted_mul(transform, instanceTransform); m_instances.push_back(instance); } diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 7e97cb74a4..b62d06f518 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -32,6 +32,7 @@ if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ extDebugDrawbuiltinResourceData) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "single.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) From 17a135c074e090f43c02df1a8b589ba8b73a63bf Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 10:59:45 +0700 Subject: [PATCH 101/140] change batch render to take span of InstanceData Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +------ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 29 +++++---------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index dad33bea27..089e885887 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -58,14 +58,10 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat); + bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat); static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); - void addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color = { 1,0,0,1 }); - void addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color = { 1,0,0,1 }); - void clearAABBs(); - protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); @@ -77,7 +73,6 @@ class DrawAABB final : public core::IReferenceCounted static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); - std::vector m_instances; core::smart_refctd_ptr m_indicesBuffer; core::smart_refctd_ptr m_verticesBuffer; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 53a6e3cebb..3c40f9306e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -336,7 +336,7 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, const hlsl::float32_t4x4& cameraMat) +bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) { using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); @@ -355,9 +355,11 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - auto instances = m_instances; - for (auto& inst : instances) + std::vector instances(aabbInstances.size()); + for (uint32_t i = 0; i < aabbInstances.size(); i++) { + auto& inst = instances[i]; + inst = aabbInstances[i]; inst.transform = hlsl::mul(cameraMat, inst.transform); } @@ -409,25 +411,4 @@ hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, fl return transform; } -void DrawAABB::addAABB(const hlsl::shapes::AABB<3,float>& aabb, const hlsl::float32_t4& color) -{ - const auto transform = hlsl::float32_t4x4(1); - addOBB(aabb, transform, color); -} - -void DrawAABB::addOBB(const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4x4& transform, const hlsl::float32_t4& color) -{ - InstanceData instance; - instance.color = color; - - hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); - instance.transform = math::linalg::promoted_mul(transform, instanceTransform); - m_instances.push_back(instance); -} - -void DrawAABB::clearAABBs() -{ - m_instances.clear(); -} - } From 1b7a433de37a338b65ce213f1ad5480fb1e3a5b9 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 12:00:50 +0700 Subject: [PATCH 102/140] removed vertex buffer, use const vertex array in shader instead Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 4 +- .../builtin/hlsl/aabb_instances.vertex.hlsl | 14 +++++- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 2 - .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 13 +++++- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 45 ++----------------- 5 files changed, 28 insertions(+), 50 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 089e885887..1efe973d10 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -64,17 +64,15 @@ class DrawAABB final : public core::IReferenceCounted protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, - core::smart_refctd_ptr indicesBuffer, core::smart_refctd_ptr verticesBuffer); + core::smart_refctd_ptr indicesBuffer); ~DrawAABB() override; private: static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); - static core::smart_refctd_ptr createVerticesBuffer(SCreationParameters& params); core::smart_refctd_ptr m_indicesBuffer; - core::smart_refctd_ptr m_verticesBuffer; SCachedCreationParameters m_cachedCreationParams; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 929ff2e60d..bb535a7216 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -13,9 +13,19 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - PSInput output; + const float32_t3 unitAABBVertices[8] = { + float32_t3(0.0, 0.0, 0.0), + float32_t3(1.0, 0.0, 0.0), + float32_t3(0.0, 0.0, 1.0), + float32_t3(1.0, 0.0, 1.0), + float32_t3(0.0, 1.0, 0.0), + float32_t3(1.0, 1.0, 0.0), + float32_t3(0.0, 1.0, 1.0), + float32_t3(1.0, 1.0, 1.0) + }; - float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + PSInput output; + float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); output.position = math::linalg::promoted_mul(instance.transform, vertex); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index 03a3bbfa49..a178d45465 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -18,13 +18,11 @@ struct InstanceData struct SSinglePushConstants { - uint64_t pVertexBuffer; InstanceData instance; }; struct SPushConstants { - uint64_t pVertexBuffer; uint64_t pInstanceBuffer; }; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index e9b68a811c..e1a426dec8 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -13,8 +13,19 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { + const float32_t3 unitAABBVertices[8] = { + float32_t3(0.0, 0.0, 0.0), + float32_t3(1.0, 0.0, 0.0), + float32_t3(0.0, 0.0, 1.0), + float32_t3(1.0, 0.0, 1.0), + float32_t3(0.0, 1.0, 0.0), + float32_t3(1.0, 1.0, 0.0), + float32_t3(0.0, 1.0, 1.0), + float32_t3(1.0, 1.0, 1.0) + }; + PSInput output; - float32_t3 vertex = (bda::__ptr::create(pc.pVertexBuffer) + glsl::gl_VertexIndex()).deref_restrict().load(); + float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); output.color = pc.instance.color; diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3c40f9306e..a11052aa76 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -44,20 +44,13 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } - auto verticesBuffer = createVerticesBuffer(params); - if (!verticesBuffer) - { - logger->log("Failed to create vertices buffer!", ILogger::ELL_ERROR); - return nullptr; - } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer, verticesBuffer)); + return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer)); } -DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, - smart_refctd_ptr indicesBuffer, smart_refctd_ptr verticesBuffer) +DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, smart_refctd_ptr indicesBuffer) : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), - m_indicesBuffer(std::move(indicesBuffer)), m_verticesBuffer(std::move(verticesBuffer)) + m_indicesBuffer(std::move(indicesBuffer)) { } @@ -271,36 +264,6 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& return indicesBuffer; } -smart_refctd_ptr DrawAABB::createVerticesBuffer(SCreationParameters& params) -{ - const auto unitAABB = core::aabbox3d({ 0, 0, 0 }, { 1, 1, 1 }); - float32_t3 pMin = { 0, 0, 0 }; - float32_t3 pMax = { 1, 1, 1 }; - - std::array unitAABBVertices; - unitAABBVertices[0] = float32_t3(pMin.x, pMin.y, pMin.z); - unitAABBVertices[1] = float32_t3(pMax.x, pMin.y, pMin.z); - unitAABBVertices[2] = float32_t3(pMin.x, pMin.y, pMax.z); - unitAABBVertices[3] = float32_t3(pMax.x, pMin.y, pMax.z); - unitAABBVertices[4] = float32_t3(pMin.x, pMax.y, pMin.z); - unitAABBVertices[5] = float32_t3(pMax.x, pMax.y, pMin.z); - unitAABBVertices[6] = float32_t3(pMin.x, pMax.y, pMax.z); - unitAABBVertices[7] = float32_t3(pMax.x, pMax.y, pMax.z); - - IGPUBuffer::SCreationParams bufparams; - bufparams.size = sizeof(float32_t3) * unitAABBVertices.size(); - bufparams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - - smart_refctd_ptr vertexBuffer; - params.utilities->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = params.transfer }, - std::move(bufparams), - unitAABBVertices.data() - ).move_into(vertexBuffer); - - return vertexBuffer; -} - core::smart_refctd_ptr DrawAABB::createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange) { return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); @@ -324,7 +287,6 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); SSinglePushConstants pc; - pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); @@ -385,7 +347,6 @@ bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo wa assert(!streaming->needsManualFlushOrInvalidate()); SPushConstants pc; - pc.pVertexBuffer = m_verticesBuffer->getDeviceAddress(); pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); From 65cf00da84c5cf0529ee8e6924b950caeaf87805 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 20 Aug 2025 16:15:36 +0700 Subject: [PATCH 103/140] validate creation params, added draw modes at create time Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 10 +++++ src/nbl/ext/DebugDraw/CDrawAABB.cpp | 64 ++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 1efe973d10..a8e3205f22 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -18,6 +18,13 @@ class DrawAABB final : public core::IReferenceCounted static constexpr inline uint32_t IndicesCount = 24u; static constexpr inline uint32_t VerticesCount = 8u; + enum DrawMode : uint16_t + { + ADM_DRAW_SINGLE = 0b01, + ADM_DRAW_BATCH = 0b10, + ADM_DRAW_BOTH = 0b11 + }; + struct SCachedCreationParameters { using streaming_buffer_t = video::StreamingTransientDataBufferST>; @@ -25,6 +32,8 @@ class DrawAABB final : public core::IReferenceCounted static constexpr inline auto RequiredAllocateFlags = core::bitflag(video::IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); static constexpr inline auto RequiredUsageFlags = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + DrawMode drawMode = ADM_DRAW_BOTH; + core::smart_refctd_ptr utilities; //! optional, default MDI buffer allocated if not provided @@ -68,6 +77,7 @@ class DrawAABB final : public core::IReferenceCounted ~DrawAABB() override; private: + static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index a11052aa76..f3f33d2733 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -19,17 +19,32 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - auto singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!singlePipeline) + if (!validateCreationParameters(params)) { - logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); return nullptr; } - auto batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!batchPipeline) + + smart_refctd_ptr singlePipeline = nullptr; + if (params.drawMode & ADM_DRAW_SINGLE) { - logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); - return nullptr; + singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!singlePipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } + } + + smart_refctd_ptr batchPipeline = nullptr; + if (params.drawMode & ADM_DRAW_BATCH) + { + batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!batchPipeline) + { + logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); + return nullptr; + } } if (!createStreamingBuffer(params)) @@ -83,6 +98,29 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } +bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) +{ + const auto validation = std::to_array + ({ + std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), + std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") + }); + + system::logger_opt_ptr logger = creationParams.utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, ILogger::ELL_ERROR); + return false; + } + + return true; +} + smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); @@ -281,6 +319,12 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_SINGLE)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw single!", ILogger::ELL_ERROR); + return false; + } + commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); commandBuffer->setLineWidth(1.f); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; @@ -300,6 +344,12 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", ILogger::ELL_ERROR); + return false; + } + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all From 3d9d614929b12aa53895ce9151b6090ce8de79bd Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 8 Sep 2025 10:56:03 +0700 Subject: [PATCH 104/140] enable debug draw by default Signed-off-by: Corey --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cdb62c4b1b..84c9a99dc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,7 +175,7 @@ option(NBL_FAST_MATH "Enable fast low-precision math" OFF) # the reason OFF is b option(NBL_BUILD_EXAMPLES "Enable building examples" ON) option(NBL_BUILD_MITSUBA_LOADER "Enable nbl::ext::MitsubaLoader?" OFF) # TODO: once it compies turn this ON by default! option(NBL_BUILD_IMGUI "Enable nbl::ext::ImGui?" ON) -option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" OFF) +option(NBL_BUILD_DEBUG_DRAW "Enable Nabla Debug Draw extension?" ON) option(NBL_BUILD_OPTIX "Enable nbl::ext::OptiX?" OFF) if(NBL_COMPILE_WITH_CUDA) From 1f423d63f7241a20827c50e5506b7db0fd66cfcd Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 16 Sep 2025 11:13:54 +0700 Subject: [PATCH 105/140] fix embed builtin resource build Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f3f33d2733..f445398e0c 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -5,6 +5,10 @@ #include "nbl/ext/DebugDraw/CDrawAABB.h" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#ifdef NBL_EMBED_BUILTIN_RESOURCES +#include "nbl/ext/debug_draw/builtin/CArchive.h" +#endif + using namespace nbl; using namespace core; using namespace video; From 16699415e20e2285beec73c14891db8d0953f77e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 16 Sep 2025 21:21:17 +0200 Subject: [PATCH 106/140] resolve https://github.com/Devsh-Graphics-Programming/Nabla/pull/900#discussion_r2352585679 Signed-off-by: Corey --- .../ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl | 2 +- .../nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl | 2 +- include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl | 2 +- include/nbl/system/ISystem.h | 1 + src/nbl/ext/DebugDraw/CDrawAABB.cpp | 2 +- src/nbl/ext/DebugDraw/CMakeLists.txt | 4 ++-- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl index 686e8934db..09a12f3d07 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl @@ -1,6 +1,6 @@ #pragma shader_stage(fragment) -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::ext::debug_draw; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index bb535a7216..5f67aa2f1e 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; using namespace nbl::ext::debug_draw; diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index e1a426dec8..64ca75d5ab 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -3,7 +3,7 @@ #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" -#include "common.hlsl" +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; using namespace nbl::ext::debug_draw; diff --git a/include/nbl/system/ISystem.h b/include/nbl/system/ISystem.h index 4e02221d7c..65f0351582 100644 --- a/include/nbl/system/ISystem.h +++ b/include/nbl/system/ISystem.h @@ -70,6 +70,7 @@ class NBL_API2 ISystem : public core::IReferenceCounted // virtual inline bool isDirectory(const system::path& p) const { + // TODO: fix bug, input "nbl/ext/DebugDraw/builtin/hlsl" -> returs true when no such dir present in mounted stuff due to how it uses parent paths in loop (goes up up till matches "nbl" builtin archive and thinks it resolved the requested dir) if (isPathReadOnly(p)) return p.extension()==""; // TODO: this is a temporary decision until we figure out how to check if a file is directory in android APK else diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f445398e0c..d231f21e3e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -162,7 +162,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); }; - if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index b62d06f518..4cb2ee54cf 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -20,8 +20,8 @@ nbl_create_ext_library_project( ) # this should be standard for all extensions -set(_ARCHIVE_ENTRY_KEY_ "DebugDraw/builtin/hlsl") # then each one has unique archive key -get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}/nbl/ext" ABSOLUTE) +set(_ARCHIVE_ENTRY_KEY_ "nbl/ext/DebugDraw/builtin/hlsl") # then each one has unique archive key +get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) From 789bdbd92a8892898869c2be012b0292e006e4e0 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 15:47:52 +0700 Subject: [PATCH 107/140] fix + optimize aabb vertex calc, includes Signed-off-by: Corey --- .../builtin/hlsl/aabb_instances.vertex.hlsl | 16 +--------------- .../nbl/ext/DebugDraw/builtin/hlsl/common.hlsl | 12 +++++++++++- .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 16 +--------------- 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl index 5f67aa2f1e..451243bbcc 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl @@ -1,8 +1,5 @@ #pragma shader_stage(vertex) -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; @@ -13,19 +10,8 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - const float32_t3 unitAABBVertices[8] = { - float32_t3(0.0, 0.0, 0.0), - float32_t3(1.0, 0.0, 0.0), - float32_t3(0.0, 0.0, 1.0), - float32_t3(1.0, 0.0, 1.0), - float32_t3(0.0, 1.0, 0.0), - float32_t3(1.0, 1.0, 0.0), - float32_t3(0.0, 1.0, 1.0), - float32_t3(1.0, 1.0, 1.0) - }; - PSInput output; - float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; + const float32_t3 vertex = getUnitAABBVertex(); InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); output.position = math::linalg::promoted_mul(instance.transform, vertex); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index a178d45465..ac5deaef5f 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -2,6 +2,11 @@ #define _NBL_DEBUG_DRAW_EXT_COMMON_HLSL #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#ifdef __HLSL_VERSION +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" +#endif namespace nbl { @@ -30,8 +35,13 @@ struct SPushConstants struct PSInput { float32_t4 position : SV_Position; - float32_t4 color : TEXCOORD0; + nointerpolation float32_t4 color : TEXCOORD0; }; + +float32_t3 getUnitAABBVertex() +{ + return (hlsl::promote(hlsl::glsl::gl_VertexIndex()) >> uint32_t3(0,2,1)) & 0x1u; +} #endif } diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl index 64ca75d5ab..5b4f2a39a7 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl @@ -1,8 +1,5 @@ #pragma shader_stage(vertex) -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/bda/__ptr.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" using namespace nbl::hlsl; @@ -13,19 +10,8 @@ using namespace nbl::ext::debug_draw; [shader("vertex")] PSInput main() { - const float32_t3 unitAABBVertices[8] = { - float32_t3(0.0, 0.0, 0.0), - float32_t3(1.0, 0.0, 0.0), - float32_t3(0.0, 0.0, 1.0), - float32_t3(1.0, 0.0, 1.0), - float32_t3(0.0, 1.0, 0.0), - float32_t3(1.0, 1.0, 0.0), - float32_t3(0.0, 1.0, 1.0), - float32_t3(1.0, 1.0, 1.0) - }; - PSInput output; - float32_t3 vertex = unitAABBVertices[glsl::gl_VertexIndex()]; + float32_t3 vertex = getUnitAABBVertex(); output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); output.color = pc.instance.color; From 00af78cc33db50dd4219f566be420511cbb1465d Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:09:08 +0700 Subject: [PATCH 108/140] changed debug_draw library target usage Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 4cb2ee54cf..7a89caca0d 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -40,4 +40,4 @@ if(NBL_EMBED_BUILTIN_RESOURCES) LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) endif() -set(NBL_EXT_DEBUG_DRAW_TARGET ${LIB_NAME} CACHE INTERNAL "Nabla's Debug Draw logical target name") +add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) From f36ee1a6e3b66ba4f5eb8289cedf4b20a49875a2 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 27 Nov 2025 16:58:28 +0700 Subject: [PATCH 109/140] some fixes to draw aabb Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 106 +++++++++++++++++++++-- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 116 ++++++-------------------- 2 files changed, 125 insertions(+), 97 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index a8e3205f22..68b7ae0e2a 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -2,21 +2,21 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_EXT_DRAW_AABB_H_ -#define _NBL_EXT_DRAW_AABB_H_ +#ifndef _NBL_EXT_DEBUG_DRAW_DRAW_AABB_H_ +#define _NBL_EXT_DEBUG_DRAW_DRAW_AABB_H_ #include "nbl/video/declarations.h" #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/shapes/aabb.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" namespace nbl::ext::debug_draw { -class DrawAABB final : public core::IReferenceCounted -{ + class DrawAABB final : public core::IReferenceCounted + { public: static constexpr inline uint32_t IndicesCount = 24u; - static constexpr inline uint32_t VerticesCount = 8u; enum DrawMode : uint16_t { @@ -39,7 +39,8 @@ class DrawAABB final : public core::IReferenceCounted //! optional, default MDI buffer allocated if not provided core::smart_refctd_ptr streamingBuffer = nullptr; }; - + + // only used to make the 24 element index buffer and instanced pipeline on create struct SCreationParameters : SCachedCreationParameters { video::IQueue* transfer = nullptr; @@ -48,6 +49,29 @@ class DrawAABB final : public core::IReferenceCounted core::smart_refctd_ptr singlePipelineLayout; core::smart_refctd_ptr batchPipelineLayout; core::smart_refctd_ptr renderpass = nullptr; + + inline bool validate() const + { + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + (assetManager && utilities && transfer && renderpass) ? std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") + }); + + system::logger_opt_ptr logger = utilities->getLogger(); + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + return true; + } }; // creates an instance that can draw one AABB via push constant or multiple using streaming buffer @@ -67,7 +91,73 @@ class DrawAABB final : public core::IReferenceCounted // records draw command for single AABB, user has to set pipeline outside bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); - bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat); + // records draw command for rendering batch of AABB instances as InstanceData + // user has to set span of filled-in InstanceData; camera matrix used in push constant + inline bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) + { + if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) + { + m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); + return false; + } + + using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; + constexpr auto MdiSizes = std::to_array({ sizeof(hlsl::float32_t3), sizeof(InstanceData) }); + // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all + constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + // allocator initialization needs us to round up to PoT + const auto MaxPOTAlignment = hlsl::roundUpToPoT(MaxAlignment); + + auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + + auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); + assert(streamingPtr); + + commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); + commandBuffer->setLineWidth(1.f); + asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; + commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); + + std::vector instances(aabbInstances.size()); + for (uint32_t i = 0; i < aabbInstances.size(); i++) + { + auto& inst = instances[i]; + inst = aabbInstances[i]; + inst.transform = hlsl::mul(cameraMat, inst.transform); + } + + auto instancesIt = instances.begin(); + const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + using suballocator_t = core::LinearAddressAllocatorST; + while (instancesIt != instances.end()) + { + const uint32_t instanceCount = hlsl::min(instancesPerIter, instances.size()); + offset_t inputOffset = 0u; + offset_t ImaginarySizeUpperBound = 0x1 << 30; + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); + const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + + memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); + instancesIt += instanceCount; + + assert(!streaming->needsManualFlushOrInvalidate()); + + SPushConstants pc; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + + commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); + + streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + } + + return true; + } static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); @@ -77,7 +167,7 @@ class DrawAABB final : public core::IReferenceCounted ~DrawAABB() override; private: - static bool validateCreationParameters(SCreationParameters& params); + //static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index d231f21e3e..60079f71fc 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/ext/DebugDraw/CDrawAABB.h" -#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #ifdef NBL_EMBED_BUILTIN_RESOURCES #include "nbl/ext/debug_draw/builtin/CArchive.h" @@ -23,7 +22,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) { auto* const logger = params.utilities->getLogger(); - if (!validateCreationParameters(params)) + if (!params.validate()) { logger->log("Failed creation parameters validation!", ILogger::ELL_ERROR); return nullptr; @@ -102,28 +101,28 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) -{ - const auto validation = std::to_array - ({ - std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), - std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), - std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), - (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") - }); - - system::logger_opt_ptr logger = creationParams.utilities->getLogger(); - for (const auto& [ok, error] : validation) - if (!ok) - { - logger.log(error, ILogger::ELL_ERROR); - return false; - } - - return true; -} +//bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) +//{ +// const auto validation = std::to_array +// ({ +// std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), +// std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), +// std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), +// std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), +// std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), +// (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") +// }); +// +// system::logger_opt_ptr logger = creationParams.utilities->getLogger(); +// for (const auto& [ok, error] : validation) +// if (!ok) +// { +// logger.log(error, ILogger::ELL_ERROR); +// return false; +// } +// +// return true; +//} smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { @@ -346,71 +345,10 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) -{ - if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) - { - m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", ILogger::ELL_ERROR); - return false; - } - - using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); - // allocator initialization needs us to round up to PoT - const auto MaxPOTAlignment = roundUpToPoT(MaxAlignment); - - auto* streaming = m_cachedCreationParams.streamingBuffer.get(); - - auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); - assert(streamingPtr); - - commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); - commandBuffer->setLineWidth(1.f); - asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; - commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - - std::vector instances(aabbInstances.size()); - for (uint32_t i = 0; i < aabbInstances.size(); i++) - { - auto& inst = instances[i]; - inst = aabbInstances[i]; - inst.transform = hlsl::mul(cameraMat, inst.transform); - } - - auto instancesIt = instances.begin(); - const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); - using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != instances.end()) - { - const uint32_t instanceCount = min(instancesPerIter, instances.size()); - offset_t inputOffset = 0u; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - - memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); - instancesIt += instanceCount; - - assert(!streaming->needsManualFlushOrInvalidate()); - - SPushConstants pc; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - - commandBuffer->pushConstants(m_batchPipeline->getLayout(), ESS_VERTEX, 0, sizeof(SPushConstants), &pc); - commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); - } - - return true; -} +//bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) +//{ +// +//} hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { From cf2cb3ec004ad954fd2f0c40e466a59e61e1d721 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 10:22:35 +0700 Subject: [PATCH 110/140] removed commented out bit Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 60079f71fc..18684ea479 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -101,29 +101,6 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l return smart_refctd_ptr(archive); } -//bool DrawAABB::validateCreationParameters(SCreationParameters& creationParams) -//{ -// const auto validation = std::to_array -// ({ -// std::make_pair(bool(creationParams.assetManager), "Invalid `creationParams.assetManager` is nullptr!"), -// std::make_pair(bool(creationParams.assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), -// std::make_pair(bool(creationParams.utilities), "Invalid `creationParams.utilities` is nullptr!"), -// std::make_pair(bool(creationParams.transfer), "Invalid `creationParams.transfer` is nullptr!"), -// std::make_pair(bool(creationParams.renderpass), "Invalid `creationParams.renderpass` is nullptr!"), -// (creationParams.assetManager && creationParams.utilities && creationParams.transfer && creationParams.renderpass) ? std::make_pair(bool(creationParams.utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[creationParams.transfer->getFamilyIndex()].queueFlags.hasFlags(IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") -// }); -// -// system::logger_opt_ptr logger = creationParams.utilities->getLogger(); -// for (const auto& [ok, error] : validation) -// if (!ok) -// { -// logger.log(error, ILogger::ELL_ERROR); -// return false; -// } -// -// return true; -//} - smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) { auto system = smart_refctd_ptr(params.assetManager->getSystem()); From 0617056c2587c11503d60670c476709fcb1f72e3 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 15:24:16 +0700 Subject: [PATCH 111/140] create pipelineLayout util can takes mode, also create layout if missing in params struct Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 29 +++++++++++---------------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 22 +++++++++++--------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 68b7ae0e2a..8c5806c48e 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -52,23 +52,18 @@ namespace nbl::ext::debug_draw inline bool validate() const { - const auto validation = std::to_array - ({ - std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), - std::make_pair(bool(assetManager->getSystem()), "Invalid `creationParams.assetManager->getSystem()` is nullptr!"), - std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), - std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), - std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), - (assetManager && utilities && transfer && renderpass) ? std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") : std::make_pair(false, "Pass valid required DrawAABB::S_CREATION_PARAMETERS!") - }); + assert(bool(assetManager)); + assert(bool(assetManager->getSystem())); + assert(bool(utilities)); + assert(bool(transfer)); + assert(bool(renderpass)); system::logger_opt_ptr logger = utilities->getLogger(); - for (const auto& [ok, error] : validation) - if (!ok) - { - logger.log(error, system::ILogger::ELL_ERROR); - return false; - } + if (!bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT))) + { + logger.log("Invalid `creationParams.transfer` is not capable of transfer operations!", system::ILogger::ELL_ERROR); + return false; + } return true; } @@ -80,8 +75,8 @@ namespace nbl::ext::debug_draw // creates pipeline layout from push constant range static core::smart_refctd_ptr createPipelineLayoutFromPCRange(video::ILogicalDevice* device, const asset::SPushConstantRange& pcRange); - // creates default pipeline layout for streaming version - static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device); + // creates default pipeline layout for pipeline specified by draw mode (note: if mode==BOTH, returns layout for BATCH mode) + static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 18684ea479..0f0e951bf6 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -31,7 +31,10 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) smart_refctd_ptr singlePipeline = nullptr; if (params.drawMode & ADM_DRAW_SINGLE) { - singlePipeline = createPipeline(params, params.singlePipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + auto pipelineLayout = params.singlePipelineLayout; + if (!pipelineLayout) + pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); + singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); if (!singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -42,7 +45,10 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) smart_refctd_ptr batchPipeline = nullptr; if (params.drawMode & ADM_DRAW_BATCH) { - batchPipeline = createPipeline(params, params.batchPipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + auto pipelineLayout = params.batchPipelineLayout; + if (!pipelineLayout) + pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); + batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); if (!batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -287,14 +293,15 @@ core::smart_refctd_ptr DrawAABB::createPipelineLayout return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); } -core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device) +core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode) { + const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SPushConstants) : sizeof(SSinglePushConstants); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, - .size = sizeof(SPushConstants) + .size = pcSize }; - return device->createPipelineLayout({ &pcRange , 1 }, nullptr, nullptr, nullptr, nullptr); + return createPipelineLayoutFromPCRange(device, pcRange); } bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) @@ -322,11 +329,6 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -//bool DrawAABB::render(IGPUCommandBuffer* commandBuffer, ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) -//{ -// -//} - hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { const auto diagonal = aabb.getExtent(); From 9edb083ee2dd4082843b9a964383d2d46eb37a00 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 16:19:10 +0700 Subject: [PATCH 112/140] aabb local transform is 3x4, common draw param struct between single and batch Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 18 +++++++++++++----- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 15 +++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 8c5806c48e..3a4096adfa 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -69,6 +69,13 @@ namespace nbl::ext::debug_draw } }; + struct DrawParameters + { + video::IGPUCommandBuffer* commandBuffer = nullptr; + hlsl::float32_t4x4 cameraMat = hlsl::float32_t4x4(1); + float lineWidth = 1.f; + }; + // creates an instance that can draw one AABB via push constant or multiple using streaming buffer static core::smart_refctd_ptr create(SCreationParameters&& params); @@ -84,11 +91,11 @@ namespace nbl::ext::debug_draw inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } // records draw command for single AABB, user has to set pipeline outside - bool renderSingle(video::IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat); + bool renderSingle(const DrawParameters& params, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color); // records draw command for rendering batch of AABB instances as InstanceData // user has to set span of filled-in InstanceData; camera matrix used in push constant - inline bool render(video::IGPUCommandBuffer* commandBuffer, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances, const hlsl::float32_t4x4& cameraMat) + inline bool render(const DrawParameters& params, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances) { if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) { @@ -108,8 +115,9 @@ namespace nbl::ext::debug_draw auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); + auto& commandBuffer = params.commandBuffer; commandBuffer->bindGraphicsPipeline(m_batchPipeline.get()); - commandBuffer->setLineWidth(1.f); + commandBuffer->setLineWidth(params.lineWidth); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); @@ -118,7 +126,7 @@ namespace nbl::ext::debug_draw { auto& inst = instances[i]; inst = aabbInstances[i]; - inst.transform = hlsl::mul(cameraMat, inst.transform); + inst.transform = hlsl::mul(params.cameraMat, inst.transform); } auto instancesIt = instances.begin(); @@ -154,7 +162,7 @@ namespace nbl::ext::debug_draw return true; } - static hlsl::float32_t4x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); + static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); protected: DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 0f0e951bf6..6302cd358e 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -304,7 +304,7 @@ core::smart_refctd_ptr DrawAABB::createDefaultPipelin return createPipelineLayoutFromPCRange(device, pcRange); } -bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color, const hlsl::float32_t4x4& cameraMat) +bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AABB<3, float>& aabb, const hlsl::float32_t4& color) { if (!(m_cachedCreationParams.drawMode & ADM_DRAW_SINGLE)) { @@ -312,15 +312,15 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return false; } + auto& commandBuffer = params.commandBuffer; commandBuffer->bindGraphicsPipeline(m_singlePipeline.get()); - commandBuffer->setLineWidth(1.f); + commandBuffer->setLineWidth(params.lineWidth); asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); SSinglePushConstants pc; - - hlsl::float32_t4x4 instanceTransform = getTransformFromAABB(aabb); - pc.instance.transform = hlsl::mul(cameraMat, instanceTransform); + hlsl::float32_t3x4 instanceTransform = getTransformFromAABB(aabb); + pc.instance.transform = math::linalg::promoted_mul(params.cameraMat, instanceTransform); pc.instance.color = color; commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); @@ -329,14 +329,13 @@ bool DrawAABB::renderSingle(IGPUCommandBuffer* commandBuffer, const hlsl::shapes return true; } -hlsl::float32_t4x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) +hlsl::float32_t3x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) { const auto diagonal = aabb.getExtent(); - hlsl::float32_t4x4 transform; + hlsl::float32_t3x4 transform; transform[0][3] = aabb.minVx.x; transform[1][3] = aabb.minVx.y; transform[2][3] = aabb.minVx.z; - transform[3][3] = 1.f; transform[0][0] = diagonal.x; transform[1][1] = diagonal.y; transform[2][2] = diagonal.z; From f47e8572b4de458fecdd1cd58e37707991c43552 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 28 Nov 2025 16:43:20 +0700 Subject: [PATCH 113/140] write instances data directly to streaming buffer mem Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 3a4096adfa..e7af675324 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -121,20 +121,22 @@ namespace nbl::ext::debug_draw asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - std::vector instances(aabbInstances.size()); - for (uint32_t i = 0; i < aabbInstances.size(); i++) - { - auto& inst = instances[i]; - inst = aabbInstances[i]; - inst.transform = hlsl::mul(params.cameraMat, inst.transform); - } + auto setInstancesRange = [&](InstanceData* data, uint32_t count) -> void { + for (uint32_t i = 0; i < count; i++) + { + auto inst = data + i; + *inst = aabbInstances[i]; + inst->transform = hlsl::mul(params.cameraMat, inst->transform); + } + }; - auto instancesIt = instances.begin(); + const uint32_t numInstances = aabbInstances.size(); const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); using suballocator_t = core::LinearAddressAllocatorST; - while (instancesIt != instances.end()) + uint32_t beginOffset = 0; + while (beginOffset < numInstances) { - const uint32_t instanceCount = hlsl::min(instancesPerIter, instances.size()); + const uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); offset_t inputOffset = 0u; offset_t ImaginarySizeUpperBound = 0x1 << 30; suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); @@ -145,8 +147,9 @@ namespace nbl::ext::debug_draw std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); - memcpy(streamingPtr + instancesByteOffset, std::addressof(*instancesIt), sizeof(InstanceData) * instanceCount); - instancesIt += instanceCount; + auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); + setInstancesRange(streamingInstancesPtr, instanceCount); + beginOffset += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); From 035d6328da59b91b3e7c560d79ca27099e68fc1e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 1 Dec 2025 12:19:14 +0700 Subject: [PATCH 114/140] use single use cmdbuf to fill indices buffer Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 70 +++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 8 deletions(-) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 6302cd358e..fa07d9805f 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -186,8 +186,6 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) { bitflag flags(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS); - if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT)) - flags |= IDeviceMemoryAllocation::EMCAF_READ; if (properties.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT)) flags |= IDeviceMemoryAllocation::EMCAF_WRITE; @@ -274,16 +272,72 @@ smart_refctd_ptr DrawAABB::createIndicesBuffer(SCreationParameters& unitAABBIndices[22] = 3; unitAABBIndices[23] = 7; + auto* device = params.utilities->getLogicalDevice(); + smart_refctd_ptr cmdbuf; + { + smart_refctd_ptr cmdpool = device->createCommandPool(params.transfer->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmdbuf, 1 })) + { + params.utilities->getLogger()->log("Failed to create Command Buffer for index buffer!\n"); + return nullptr; + } + } + IGPUBuffer::SCreationParams bufparams; bufparams.size = sizeof(uint32_t) * unitAABBIndices.size(); - bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + bufparams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; smart_refctd_ptr indicesBuffer; - params.utilities->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = params.transfer }, - std::move(bufparams), - unitAABBIndices.data() - ).move_into(indicesBuffer); + { + indicesBuffer = device->createBuffer(std::move(bufparams)); + if (!indicesBuffer) + { + params.utilities->getLogger()->log("Failed to create index buffer!\n"); + return nullptr; + } + + video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = indicesBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + + auto bufMem = device->allocate(reqs, indicesBuffer.get()); + if (!bufMem.isValid()) + { + params.utilities->getLogger()->log("Failed to allocate device memory compatible with index buffer!\n"); + return nullptr; + } + } + + { + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("Fill indices buffer begin"); + + SBufferRange bufRange = { .offset = 0, .size = indicesBuffer->getSize(), .buffer = indicesBuffer }; + cmdbuf->updateBuffer(bufRange, unitAABBIndices.data()); + + cmdbuf->endDebugMarker(); + cmdbuf->end(); + } + + smart_refctd_ptr idxBufProgress; + constexpr auto FinishedValue = 25; + { + constexpr auto StartedValue = 0; + idxBufProgress = device->createSemaphore(StartedValue); + + IQueue::SSubmitInfo submitInfos[1] = {}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[] = { {.cmdbuf = cmdbuf.get()} }; + submitInfos[0].commandBuffers = cmdbufs; + const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = idxBufProgress.get(),.value = FinishedValue,.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS} }; + submitInfos[0].signalSemaphores = signals; + + params.transfer->submit(submitInfos); + } + + const ISemaphore::SWaitInfo waitInfos[] = { { + .semaphore = idxBufProgress.get(), + .value = FinishedValue + } }; + device->blockForSemaphores(waitInfos); return indicesBuffer; } From 173995a75cb9b554269a693c610ae50eeef2a6f4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 18 Dec 2025 11:25:49 +0700 Subject: [PATCH 115/140] roll constructor params into own struct, fix assert in validation Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 48 ++++++++++++++++++--------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 29 ++++++---------- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index e7af675324..22a7a1e595 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -40,10 +40,9 @@ namespace nbl::ext::debug_draw core::smart_refctd_ptr streamingBuffer = nullptr; }; - // only used to make the 24 element index buffer and instanced pipeline on create struct SCreationParameters : SCachedCreationParameters { - video::IQueue* transfer = nullptr; + video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; core::smart_refctd_ptr singlePipelineLayout; @@ -52,18 +51,24 @@ namespace nbl::ext::debug_draw inline bool validate() const { - assert(bool(assetManager)); - assert(bool(assetManager->getSystem())); - assert(bool(utilities)); - assert(bool(transfer)); - assert(bool(renderpass)); + const auto validation = std::to_array + ({ + std::make_pair(bool(assetManager), "Invalid `creationParams.assetManager` is nullptr!"), + std::make_pair(bool(utilities), "Invalid `creationParams.utilities` is nullptr!"), + std::make_pair(bool(transfer), "Invalid `creationParams.transfer` is nullptr!"), + std::make_pair(bool(renderpass), "Invalid `creationParams.renderpass` is nullptr!"), + std::make_pair(bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT)), "Invalid `creationParams.transfer` is not capable of transfer operations!") + }); system::logger_opt_ptr logger = utilities->getLogger(); - if (!bool(utilities->getLogicalDevice()->getPhysicalDevice()->getQueueFamilyProperties()[transfer->getFamilyIndex()].queueFlags.hasFlags(video::IQueue::FAMILY_FLAGS::TRANSFER_BIT))) - { - logger.log("Invalid `creationParams.transfer` is not capable of transfer operations!", system::ILogger::ELL_ERROR); - return false; - } + for (const auto& [ok, error] : validation) + if (!ok) + { + logger.log(error, system::ILogger::ELL_ERROR); + return false; + } + + assert(bool(assetManager->getSystem())); return true; } @@ -168,12 +173,23 @@ namespace nbl::ext::debug_draw static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); protected: - DrawAABB(SCreationParameters&& _params, core::smart_refctd_ptr singlePipeline, core::smart_refctd_ptr batchPipeline, - core::smart_refctd_ptr indicesBuffer); - ~DrawAABB() override; + struct ConstructorParams + { + SCachedCreationParameters creationParams; + core::smart_refctd_ptr singlePipeline = nullptr; + core::smart_refctd_ptr batchPipeline = nullptr; + core::smart_refctd_ptr indicesBuffer = nullptr; + }; + + DrawAABB(ConstructorParams&& params) : + m_cachedCreationParams(std::move(params.creationParams)), + m_singlePipeline(std::move(params.singlePipeline)), + m_batchPipeline(std::move(params.batchPipeline)), + m_indicesBuffer(std::move(params.indicesBuffer)) + {} + ~DrawAABB() override {} private: - //static bool validateCreationParameters(SCreationParameters& params); static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index fa07d9805f..d2bf60849b 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -28,28 +28,28 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - smart_refctd_ptr singlePipeline = nullptr; + ConstructorParams constructorParams; + if (params.drawMode & ADM_DRAW_SINGLE) { auto pipelineLayout = params.singlePipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); - singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!singlePipeline) + constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!constructorParams.singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; } } - smart_refctd_ptr batchPipeline = nullptr; if (params.drawMode & ADM_DRAW_BATCH) { auto pipelineLayout = params.batchPipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); - batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); - if (!batchPipeline) + constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + if (!constructorParams.batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); return nullptr; @@ -62,24 +62,15 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return nullptr; } - auto indicesBuffer = createIndicesBuffer(params); - if (!indicesBuffer) + constructorParams.indicesBuffer = createIndicesBuffer(params); + if (!constructorParams.indicesBuffer) { logger->log("Failed to create indices buffer!", ILogger::ELL_ERROR); return nullptr; } - return core::smart_refctd_ptr(new DrawAABB(std::move(params), singlePipeline, batchPipeline, indicesBuffer)); -} - -DrawAABB::DrawAABB(SCreationParameters&& params, core::smart_refctd_ptr singlePipeline, smart_refctd_ptr batchPipeline, smart_refctd_ptr indicesBuffer) - : m_cachedCreationParams(std::move(params)), m_singlePipeline(std::move(singlePipeline)), m_batchPipeline(std::move(batchPipeline)), - m_indicesBuffer(std::move(indicesBuffer)) -{ -} - -DrawAABB::~DrawAABB() -{ + constructorParams.creationParams = std::move(params); + return core::smart_refctd_ptr(new DrawAABB(std::move(constructorParams))); } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly From b5dd56bf2d3d4e3c21516c6b71199d116e511f31 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 18 Dec 2025 13:56:45 +0700 Subject: [PATCH 116/140] adds a check against double mounting same archive Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index d2bf60849b..c32c591410 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -83,6 +83,12 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (!system) return nullptr; + if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + { + logger->log("CDrawAABB directory is already mounted!", ILogger::ELL_WARNING); + return nullptr; + } + // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data @@ -135,6 +141,12 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); }; + if (!system->areBuiltinsMounted()) + { + params.utilities->getLogger()->log("Nabla builtins are not mounted!", ILogger::ELL_ERROR); + return nullptr; + } + if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); From 4965339716a787861a5a9f267267620e5382d950 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Fri, 19 Dec 2025 10:41:55 +0700 Subject: [PATCH 117/140] return false if the streaming buffer is too small Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 22a7a1e595..99f0fa9223 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -137,6 +137,8 @@ namespace nbl::ext::debug_draw const uint32_t numInstances = aabbInstances.size(); const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + if (numInstances > instancesPerIter) + return false; using suballocator_t = core::LinearAddressAllocatorST; uint32_t beginOffset = 0; while (beginOffset < numInstances) From 1a324998fec18b964ac8727400283bda504e46b4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 11:06:52 +0700 Subject: [PATCH 118/140] some fixes to using/filling streaming buffer Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 89 ++++++++++++++++++++------- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 14 ----- 2 files changed, 67 insertions(+), 36 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 99f0fa9223..9c1f24f2fa 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -45,8 +45,8 @@ namespace nbl::ext::debug_draw video::IQueue* transfer = nullptr; // only used to make the 24 element index buffer and instanced pipeline on create core::smart_refctd_ptr assetManager = nullptr; - core::smart_refctd_ptr singlePipelineLayout; - core::smart_refctd_ptr batchPipelineLayout; + core::smart_refctd_ptr singlePipelineLayout = nullptr; + core::smart_refctd_ptr batchPipelineLayout = nullptr; core::smart_refctd_ptr renderpass = nullptr; inline bool validate() const @@ -77,7 +77,7 @@ namespace nbl::ext::debug_draw struct DrawParameters { video::IGPUCommandBuffer* commandBuffer = nullptr; - hlsl::float32_t4x4 cameraMat = hlsl::float32_t4x4(1); + hlsl::float32_t4x4 cameraMat; float lineWidth = 1.f; }; @@ -102,20 +102,23 @@ namespace nbl::ext::debug_draw // user has to set span of filled-in InstanceData; camera matrix used in push constant inline bool render(const DrawParameters& params, video::ISemaphore::SWaitInfo waitInfo, std::span aabbInstances) { + system::logger_opt_ptr logger = m_cachedCreationParams.utilities->getLogger(); if (!(m_cachedCreationParams.drawMode & ADM_DRAW_BATCH)) { - m_cachedCreationParams.utilities->getLogger()->log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); + logger.log("DrawAABB has not been enabled for draw batches!", system::ILogger::ELL_ERROR); return false; } using offset_t = SCachedCreationParameters::streaming_buffer_t::size_type; - constexpr auto MdiSizes = std::to_array({ sizeof(hlsl::float32_t3), sizeof(InstanceData) }); - // shared nPoT alignment needs to be divisible by all smaller ones to satisfy an allocation from all - constexpr offset_t MaxAlignment = std::reduce(MdiSizes.begin(), MdiSizes.end(), 1, [](const offset_t a, const offset_t b)->offset_t {return std::lcm(a, b); }); + constexpr offset_t MaxAlignment = sizeof(InstanceData); // allocator initialization needs us to round up to PoT const auto MaxPOTAlignment = hlsl::roundUpToPoT(MaxAlignment); - auto* streaming = m_cachedCreationParams.streamingBuffer.get(); + if (streaming->getAddressAllocator().max_alignment() < MaxPOTAlignment) + { + logger.log("Draw AABB Streaming Buffer cannot guarantee the alignments we require!"); + return false; + } auto* const streamingPtr = reinterpret_cast(streaming->getBufferPointer()); assert(streamingPtr); @@ -126,37 +129,68 @@ namespace nbl::ext::debug_draw asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); + auto srcIt = aabbInstances.begin(); auto setInstancesRange = [&](InstanceData* data, uint32_t count) -> void { for (uint32_t i = 0; i < count; i++) { auto inst = data + i; - *inst = aabbInstances[i]; + *inst = *srcIt; inst->transform = hlsl::mul(params.cameraMat, inst->transform); + srcIt++; + + if (srcIt == aabbInstances.end()) + break; } - }; + }; const uint32_t numInstances = aabbInstances.size(); - const uint32_t instancesPerIter = streaming->getBuffer()->getSize() / sizeof(InstanceData); + const uint32_t instancesPerIter = streaming->max_size() / sizeof(InstanceData); if (numInstances > instancesPerIter) return false; using suballocator_t = core::LinearAddressAllocatorST; - uint32_t beginOffset = 0; - while (beginOffset < numInstances) + uint32_t blockOffset = 0u; + while (srcIt != aabbInstances.end()) { - const uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); - offset_t inputOffset = 0u; + uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); + offset_t inputOffset = blockOffset; offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, hlsl::roundUpToPoT(MaxAlignment), ImaginarySizeUpperBound); + suballocator_t imaginaryChunk(nullptr, inputOffset, 0, MaxPOTAlignment, ImaginarySizeUpperBound); uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); const uint32_t totalSize = imaginaryChunk.get_allocated_size(); + + uint32_t blockSize; + bool allocated = false; + for (uint32_t t = 0; t < 2; t++) + { + blockSize = hlsl::max(streaming->max_size(), totalSize); + while (blockSize >= totalSize) + { + inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + if (streaming->multi_allocate(waitTill, 1, &inputOffset, &blockSize, &MaxAlignment) == 0u) + { + allocated = true; + break; + } + + streaming->cull_frees(); + blockSize >>= 1; + } - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - streaming->multi_allocate(waitTill, 1, &inputOffset, &totalSize, &MaxAlignment); + if (allocated) + break; + } + if (!allocated) + { + logger.log("Failed to allocate even the smallest chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); + return false; + } + + instanceCount = blockSize / sizeof(InstanceData); + blockOffset += blockSize; auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); setInstancesRange(streamingInstancesPtr, instanceCount); - beginOffset += instanceCount; assert(!streaming->needsManualFlushOrInvalidate()); @@ -166,13 +200,24 @@ namespace nbl::ext::debug_draw commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - streaming->multi_deallocate(1, &inputOffset, &totalSize, waitInfo); + streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); } return true; } - static hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb); + static inline hlsl::float32_t3x4 getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) + { + const auto diagonal = aabb.getExtent(); + hlsl::float32_t3x4 transform; + transform[0][3] = aabb.minVx.x; + transform[1][3] = aabb.minVx.y; + transform[2][3] = aabb.minVx.z; + transform[0][0] = diagonal.x; + transform[1][1] = diagonal.y; + transform[2][2] = diagonal.z; + return transform; + } protected: struct ConstructorParams diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index c32c591410..641811fe0f 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -226,7 +226,6 @@ bool DrawAABB::createStreamingBuffer(SCreationParameters& params) const auto validation = std::to_array ({ std::make_pair(buffer->getCreationParams().usage.hasFlags(SCachedCreationParameters::RequiredUsageFlags), "Streaming buffer must be created with IBuffer::EUF_STORAGE_BUFFER_BIT | IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT enabled!"), - std::make_pair(bool(buffer->getMemoryReqs().memoryTypeBits & params.utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits()), "Streaming buffer must have up-streaming memory type bits enabled!"), std::make_pair(binding.memory->getAllocateFlags().hasFlags(SCachedCreationParameters::RequiredAllocateFlags), "Streaming buffer's memory must be allocated with IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT enabled!"), std::make_pair(binding.memory->isCurrentlyMapped(), "Streaming buffer's memory must be mapped!"), // streaming buffer contructor already validates it, but cannot assume user won't unmap its own buffer for some reason (sorry if you have just hit it) std::make_pair(binding.memory->getCurrentMappingAccess().hasFlags(getRequiredAccessFlags(binding.memory->getMemoryPropertyFlags())), "Streaming buffer's memory current mapping access flags don't meet requirements!") @@ -386,17 +385,4 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA return true; } -hlsl::float32_t3x4 DrawAABB::getTransformFromAABB(const hlsl::shapes::AABB<3, float>& aabb) -{ - const auto diagonal = aabb.getExtent(); - hlsl::float32_t3x4 transform; - transform[0][3] = aabb.minVx.x; - transform[1][3] = aabb.minVx.y; - transform[2][3] = aabb.minVx.z; - transform[0][0] = diagonal.x; - transform[1][1] = diagonal.y; - transform[2][2] = diagonal.z; - return transform; -} - } From 18b5a4dc6c84326d9f39239d5b3e1a58adae1dea Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 22 Dec 2025 15:10:27 +0700 Subject: [PATCH 119/140] combined draw aabb shaders into unified, added precompile shaders to spv for extension Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 6 +- .../builtin/hlsl/aabb_instances.fragment.hlsl | 13 --- .../builtin/hlsl/aabb_instances.vertex.hlsl | 21 ---- .../ext/DebugDraw/builtin/hlsl/common.hlsl | 10 +- .../builtin/hlsl/draw_aabb.unified.hlsl | 39 +++++++ .../DebugDraw/builtin/hlsl/single.vertex.hlsl | 20 ---- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 100 +++++++----------- src/nbl/ext/DebugDraw/CMakeLists.txt | 63 ++++++++--- 8 files changed, 139 insertions(+), 133 deletions(-) delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl create mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl delete mode 100644 include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 9c1f24f2fa..5b42ac25ba 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -194,10 +194,10 @@ namespace nbl::ext::debug_draw assert(!streaming->needsManualFlushOrInvalidate()); - SPushConstants pc; + SInstancedPC pc; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; - commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, 0, sizeof(SPushConstants), &pc); + commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, ipc), sizeof(SInstancedPC), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); @@ -237,7 +237,7 @@ namespace nbl::ext::debug_draw ~DrawAABB() override {} private: - static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath); + static core::smart_refctd_ptr createPipeline(SCreationParameters& params, const video::IGPUPipelineLayout* pipelineLayout, const DrawMode mode); static bool createStreamingBuffer(SCreationParameters& params); static core::smart_refctd_ptr createIndicesBuffer(SCreationParameters& params); diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl deleted file mode 100644 index 09a12f3d07..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.fragment.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -#pragma shader_stage(fragment) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::ext::debug_draw; - -[shader("pixel")] -float32_t4 main(PSInput input) : SV_TARGET -{ - float32_t4 outColor = input.color; - - return outColor; -} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl deleted file mode 100644 index 451243bbcc..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/aabb_instances.vertex.hlsl +++ /dev/null @@ -1,21 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::hlsl; -using namespace nbl::ext::debug_draw; - -[[vk::push_constant]] SPushConstants pc; - -[shader("vertex")] -PSInput main() -{ - PSInput output; - const float32_t3 vertex = getUnitAABBVertex(); - InstanceData instance = vk::BufferPointer(pc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); - - output.position = math::linalg::promoted_mul(instance.transform, vertex); - output.color = instance.color; - - return output; -} \ No newline at end of file diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl index ac5deaef5f..b665c9d43a 100644 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/common.hlsl @@ -21,16 +21,22 @@ struct InstanceData hlsl::float32_t4 color; }; -struct SSinglePushConstants +struct SSinglePC { InstanceData instance; }; -struct SPushConstants +struct SInstancedPC { uint64_t pInstanceBuffer; }; +struct PushConstants +{ + SSinglePC spc; + SInstancedPC ipc; +}; + #ifdef __HLSL_VERSION struct PSInput { diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl new file mode 100644 index 0000000000..0b51f7de53 --- /dev/null +++ b/include/nbl/ext/DebugDraw/builtin/hlsl/draw_aabb.unified.hlsl @@ -0,0 +1,39 @@ +#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" + +using namespace nbl::hlsl; +using namespace nbl::ext::debug_draw; + +[[vk::push_constant]] PushConstants pc; + +[shader("vertex")] +PSInput aabb_vertex_single() +{ + PSInput output; + float32_t3 vertex = getUnitAABBVertex(); + + output.position = math::linalg::promoted_mul(pc.spc.instance.transform, vertex); + output.color = pc.spc.instance.color; + + return output; +} + +[shader("vertex")] +PSInput aabb_vertex_instances() +{ + PSInput output; + const float32_t3 vertex = getUnitAABBVertex(); + InstanceData instance = vk::BufferPointer(pc.ipc.pInstanceBuffer + sizeof(InstanceData) * glsl::gl_InstanceIndex()).Get(); + + output.position = math::linalg::promoted_mul(instance.transform, vertex); + output.color = instance.color; + + return output; +} + +[shader("pixel")] +float32_t4 aabb_fragment(PSInput input) : SV_TARGET +{ + float32_t4 outColor = input.color; + + return outColor; +} diff --git a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl b/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl deleted file mode 100644 index 5b4f2a39a7..0000000000 --- a/include/nbl/ext/DebugDraw/builtin/hlsl/single.vertex.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -#pragma shader_stage(vertex) - -#include "nbl/ext/DebugDraw/builtin/hlsl/common.hlsl" - -using namespace nbl::hlsl; -using namespace nbl::ext::debug_draw; - -[[vk::push_constant]] SSinglePushConstants pc; - -[shader("vertex")] -PSInput main() -{ - PSInput output; - float32_t3 vertex = getUnitAABBVertex(); - - output.position = math::linalg::promoted_mul(pc.instance.transform, vertex); - output.color = pc.instance.color; - - return output; -} \ No newline at end of file diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 641811fe0f..49a41d2aa7 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -8,6 +8,8 @@ #include "nbl/ext/debug_draw/builtin/CArchive.h" #endif +#include "nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp" + using namespace nbl; using namespace core; using namespace video; @@ -35,7 +37,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) auto pipelineLayout = params.singlePipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_SINGLE); - constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), "single.vertex.hlsl", "aabb_instances.fragment.hlsl"); + constructorParams.singlePipeline = createPipeline(params, pipelineLayout.get(), ADM_DRAW_SINGLE); if (!constructorParams.singlePipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -48,7 +50,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) auto pipelineLayout = params.batchPipelineLayout; if (!pipelineLayout) pipelineLayout = createDefaultPipelineLayout(params.utilities->getLogicalDevice(), ADM_DRAW_BATCH); - constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), "aabb_instances.vertex.hlsl", "aabb_instances.fragment.hlsl"); + constructorParams.batchPipeline = createPipeline(params, pipelineLayout.get(), ADM_DRAW_BATCH); if (!constructorParams.batchPipeline) { logger->log("Failed to create pipeline!", ILogger::ELL_ERROR); @@ -74,7 +76,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ENTRY_KEY_; +constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ABSOLUTE_SPV_PATH_; const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) { @@ -85,75 +87,54 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) { - logger->log("CDrawAABB directory is already mounted!", ILogger::ELL_WARNING); + logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; } // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data -#ifdef NBL_EMBED_BUILTIN_RESOURCES - auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#else - auto NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY = (path(_ARCHIVE_ABSOLUTE_ENTRY_PATH_) / NBL_ARCHIVE_ENTRY).make_preferred(); - auto archive = make_smart_refctd_ptr(std::move(NBL_EXTENSION_MOUNT_DIRECTORY_ENTRY), smart_refctd_ptr(logger), system); + auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#endif return smart_refctd_ptr(archive); } -smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, const std::string& vsPath, const std::string& fsPath) +smart_refctd_ptr DrawAABB::createPipeline(SCreationParameters& params, const IGPUPipelineLayout* pipelineLayout, DrawMode mode) { + system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - auto* set = params.assetManager->getCompilerSet(); - auto compiler = set->getShaderCompiler(IShader::E_CONTENT_TYPE::ECT_HLSL); - auto includeFinder = make_smart_refctd_ptr(smart_refctd_ptr(system)); - auto includeLoader = includeFinder->getDefaultFileSystemLoader(); - includeFinder->addSearchPath(NBL_ARCHIVE_ENTRY.data(), includeLoader); - auto compileShader = [&](const std::string& filePath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + + auto getShader = [&](const core::string& key)->smart_refctd_ptr { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = params.utilities->getLogger(); + lp.workingDirectory = _ARCHIVE_ABSOLUTE_SPV_PATH_; + auto bundle = params.assetManager->getAsset(key.c_str(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = params.utilities->getLogger(); - lparams.workingDirectory = NBL_ARCHIVE_ENTRY.data(); - auto bundle = params.assetManager->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType() != IAsset::ET_SHADER) - { - params.utilities->getLogger()->log("Shader %s not found!", ILogger::ELL_ERROR, filePath.c_str()); - exit(-1); - } - - const auto assets = bundle.getContents(); - assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - if (!shaderSrc) - return nullptr; - - CHLSLCompiler::SOptions options = {}; - options.stage = stage; - options.preprocessorOptions.sourceIdentifier = filePath; - options.preprocessorOptions.logger = params.utilities->getLogger(); - options.preprocessorOptions.includeFinder = includeFinder.get(); - shaderSrc = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); - - return params.utilities->getLogicalDevice()->compileShader({ shaderSrc.get() }); - }; + logger.log("Failed to load shader %s from disk", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } - if (!system->areBuiltinsMounted()) - { - params.utilities->getLogger()->log("Nabla builtins are not mounted!", ILogger::ELL_ERROR); - return nullptr; - } + if (bundle.getAssetType() != IAsset::ET_SHADER) + { + logger.log("Loaded asset has wrong type!", ILogger::ELL_ERROR); + return nullptr; + } - if (!system->exists(path(NBL_ARCHIVE_ENTRY) / "common.hlsl", {})) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + return IAsset::castDown(contents[0]); + }; - auto vertexShader = compileShader(vsPath, IShader::E_SHADER_STAGE::ESS_VERTEX); - auto fragmentShader = compileShader(fsPath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); + auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); + smart_refctd_ptr unifiedShader = getShader(key); - if (!vertexShader || !fragmentShader) + if (!unifiedShader) { params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); return nullptr; @@ -161,8 +142,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet video::IGPUGraphicsPipeline::SCreationParams pipelineParams[1] = {}; pipelineParams[0].layout = pipelineLayout; - pipelineParams[0].vertexShader = { .shader = vertexShader.get(), .entryPoint = "main" }; - pipelineParams[0].fragmentShader = { .shader = fragmentShader.get(), .entryPoint = "main" }; + pipelineParams[0].vertexShader = { .shader = unifiedShader.get(), .entryPoint = (mode & ADM_DRAW_SINGLE) ? "aabb_vertex_single" : "aabb_vertex_instances" }; + pipelineParams[0].fragmentShader = { .shader = unifiedShader.get(), .entryPoint = "aabb_fragment" }; pipelineParams[0].cached = { .primitiveAssembly = { .primitiveType = asset::E_PRIMITIVE_TOPOLOGY::EPT_LINE_LIST, @@ -351,10 +332,11 @@ core::smart_refctd_ptr DrawAABB::createPipelineLayout core::smart_refctd_ptr DrawAABB::createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode) { - const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SPushConstants) : sizeof(SSinglePushConstants); + const uint32_t offset = (mode & ADM_DRAW_BATCH) ? offsetof(ext::debug_draw::PushConstants, ipc) : offsetof(ext::debug_draw::PushConstants, spc); + const uint32_t pcSize = (mode & ADM_DRAW_BATCH) ? sizeof(SInstancedPC) : sizeof(SSinglePC); SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, - .offset = 0, + .offset = offset, .size = pcSize }; return createPipelineLayoutFromPCRange(device, pcRange); @@ -374,12 +356,12 @@ bool DrawAABB::renderSingle(const DrawParameters& params, const hlsl::shapes::AA asset::SBufferBinding indexBinding = { .offset = 0, .buffer = m_indicesBuffer }; commandBuffer->bindIndexBuffer(indexBinding, asset::EIT_32BIT); - SSinglePushConstants pc; + SSinglePC pc; hlsl::float32_t3x4 instanceTransform = getTransformFromAABB(aabb); pc.instance.transform = math::linalg::promoted_mul(params.cameraMat, instanceTransform); pc.instance.color = color; - commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, 0, sizeof(SSinglePushConstants), &pc); + commandBuffer->pushConstants(m_singlePipeline->getLayout(), ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, spc), sizeof(SSinglePC), &pc); commandBuffer->drawIndexed(IndicesCount, 1, 0, 0, 0); return true; diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 7a89caca0d..2eb05b739b 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -19,25 +19,58 @@ nbl_create_ext_library_project( "" ) -# this should be standard for all extensions -set(_ARCHIVE_ENTRY_KEY_ "nbl/ext/DebugDraw/builtin/hlsl") # then each one has unique archive key get_filename_component(_ARCHIVE_ABSOLUTE_ENTRY_PATH_ "${NBL_EXT_INTERNAL_INCLUDE_DIR}" ABSOLUTE) -get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) -get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_ENTRY_PATH_="${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}") -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ENTRY_KEY_="${_ARCHIVE_ENTRY_KEY_}") +set(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/DebugDraw/builtin/hlsl") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/common.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/single.vertex.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.vertex.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.fragment.hlsl +) +target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) +set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) + +target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_SPV_PATH_="${OUTPUT_DIRECTORY}") -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ extDebugDrawbuiltinResourceData) +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/draw_aabb.unified.hlsl", + "KEY": "draw_aabb", + } + +] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "common.hlsl") - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "single.vertex.hlsl") # (*) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.vertex.hlsl") # (*) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "aabb_instances.fragment.hlsl") # (*) +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${LIB_NAME}SPIRV + LINK_TO ${LIB_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_DEBUG_DRAW_HLSL_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp + NAMESPACE nbl::ext::debug_draw::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::ext::debug_draw::builtin::build + TARGET ${LIB_NAME}_builtinsBuild + LINK_TO ${LIB_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}" "${_ARCHIVE_ENTRY_KEY_}" "nbl::ext::debug_draw::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - LINK_BUILTIN_RESOURCES_TO_TARGET(${LIB_NAME} ${_BR_TARGET_}) -endif() add_library(Nabla::ext::DebugDraw ALIAS ${LIB_NAME}) From f54d9b2f07e0102efc9e9db01f7348ad0f428a35 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 14:17:53 +0700 Subject: [PATCH 120/140] restore ifdef for mounting builtin resources, minor fixes to mounting Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 16 ++++++++++------ src/nbl/ext/DebugDraw/CMakeLists.txt | 6 +----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 49a41d2aa7..3873ad9041 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -76,7 +76,7 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) } // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = _ARCHIVE_ABSOLUTE_SPV_PATH_; +constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) { @@ -85,7 +85,7 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l if (!system) return nullptr; - if (system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) + if (system->exists(path(NBL_ARCHIVE_ENTRY), {})) { logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; @@ -93,9 +93,13 @@ const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr l // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data - +#ifdef NBL_EMBED_BUILTIN_RESOURCES + auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#else auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); system->mount(smart_refctd_ptr(archive), archiveAlias.data()); +#endif return smart_refctd_ptr(archive); } @@ -105,13 +109,13 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - if (!system->isDirectory(path(NBL_ARCHIVE_ENTRY.data()))) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + if (!system->exists(path(NBL_ARCHIVE_ENTRY), {})) + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = params.utilities->getLogger(); - lp.workingDirectory = _ARCHIVE_ABSOLUTE_SPV_PATH_; + lp.workingDirectory = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; auto bundle = params.assetManager->getAsset(key.c_str(), lp); const auto contents = bundle.getContents(); diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 2eb05b739b..25e4be718a 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -25,15 +25,11 @@ set(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT "${_ARCHIVE_ABSOLUTE_ENTRY_PATH_}/nbl/ext/De set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(DEPENDS ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/common.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/single.vertex.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.vertex.hlsl - ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/aabb_instances.fragment.hlsl + ${NBL_DEBUG_DRAW_HLSL_MOUNT_POINT}/draw_aabb.unified.hlsl ) target_sources(${LIB_NAME} PRIVATE ${DEPENDS}) set_source_files_properties(${DEPENDS} PROPERTIES HEADER_FILE_ONLY ON) -target_compile_definitions(${LIB_NAME} PRIVATE _ARCHIVE_ABSOLUTE_SPV_PATH_="${OUTPUT_DIRECTORY}") - set(SM 6_8) set(JSON [=[ [ From 789e88b4b8a5e9fc496d1bc8a6c12adf1a14c752 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 16:30:46 +0700 Subject: [PATCH 121/140] simplified usage of streaming buffer alignments, flush unused memory range Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 58 ++++++++++----------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 5b42ac25ba..dd6e4e6c78 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -144,63 +144,49 @@ namespace nbl::ext::debug_draw }; const uint32_t numInstances = aabbInstances.size(); - const uint32_t instancesPerIter = streaming->max_size() / sizeof(InstanceData); - if (numInstances > instancesPerIter) - return false; - using suballocator_t = core::LinearAddressAllocatorST; - uint32_t blockOffset = 0u; - while (srcIt != aabbInstances.end()) + uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); + while (srcIt != aabbInstances.end() && remainingInstancesBytes > 0u) { - uint32_t instanceCount = hlsl::min(instancesPerIter, numInstances); - offset_t inputOffset = blockOffset; - offset_t ImaginarySizeUpperBound = 0x1 << 30; - suballocator_t imaginaryChunk(nullptr, inputOffset, 0, MaxPOTAlignment, ImaginarySizeUpperBound); - uint32_t instancesByteOffset = imaginaryChunk.alloc_addr(sizeof(InstanceData) * instanceCount, sizeof(InstanceData)); - const uint32_t totalSize = imaginaryChunk.get_allocated_size(); - - uint32_t blockSize; + uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); bool allocated = false; + + offset_t blockOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; for (uint32_t t = 0; t < 2; t++) { - blockSize = hlsl::max(streaming->max_size(), totalSize); - while (blockSize >= totalSize) + std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); + if (streaming->multi_allocate(waitTill, 1, &blockOffset, &blockByteSize, &MaxAlignment) == 0u) { - inputOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); - if (streaming->multi_allocate(waitTill, 1, &inputOffset, &blockSize, &MaxAlignment) == 0u) - { - allocated = true; - break; - } - - streaming->cull_frees(); - blockSize >>= 1; - } - - if (allocated) + allocated = true; break; + } + streaming->cull_frees(); } if (!allocated) { - logger.log("Failed to allocate even the smallest chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); + logger.log("Failed to allocate a chunk from streaming buffer for the next drawcall batch.", system::ILogger::ELL_ERROR); return false; } - instanceCount = blockSize / sizeof(InstanceData); - blockOffset += blockSize; - auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + instancesByteOffset); + const uint32_t instanceCount = blockByteSize / sizeof(InstanceData); + auto* const streamingInstancesPtr = reinterpret_cast(streamingPtr + blockOffset); setInstancesRange(streamingInstancesPtr, instanceCount); - assert(!streaming->needsManualFlushOrInvalidate()); + if (streaming->needsManualFlushOrInvalidate()) + { + const video::ILogicalDevice::MappedMemoryRange flushRange(streaming->getBuffer()->getBoundMemory().memory, blockOffset, blockByteSize); + m_cachedCreationParams.utilities->getLogicalDevice()->flushMappedMemoryRanges(1, &flushRange); + } + + remainingInstancesBytes -= blockByteSize; SInstancedPC pc; - pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + instancesByteOffset; + pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + blockOffset; commandBuffer->pushConstants(m_batchPipeline->getLayout(), asset::IShader::E_SHADER_STAGE::ESS_VERTEX, offsetof(ext::debug_draw::PushConstants, ipc), sizeof(SInstancedPC), &pc); commandBuffer->drawIndexed(IndicesCount, instanceCount, 0, 0, 0); - streaming->multi_deallocate(1, &inputOffset, &blockSize, waitInfo); + streaming->multi_deallocate(1, &blockOffset, &blockByteSize, waitInfo); } return true; From b7a03ee450ead202e3bd749c4c3e124ea9017e1b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 16:38:21 +0700 Subject: [PATCH 122/140] fix calculating remaining instances bytes Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index dd6e4e6c78..1a73f85942 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -145,7 +145,7 @@ namespace nbl::ext::debug_draw const uint32_t numInstances = aabbInstances.size(); uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); - while (srcIt != aabbInstances.end() && remainingInstancesBytes > 0u) + while (srcIt != aabbInstances.end()) { uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); bool allocated = false; @@ -178,7 +178,7 @@ namespace nbl::ext::debug_draw m_cachedCreationParams.utilities->getLogicalDevice()->flushMappedMemoryRanges(1, &flushRange); } - remainingInstancesBytes -= blockByteSize; + remainingInstancesBytes -= instanceCount * sizeof(InstanceData); SInstancedPC pc; pc.pInstanceBuffer = m_cachedCreationParams.streamingBuffer->getBuffer()->getDeviceAddress() + blockOffset; From e8f4d001b44d3e1cce2c0f836daa268dad207a80 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 19:41:50 +0700 Subject: [PATCH 123/140] check whether spirv exists Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 1a73f85942..48c8b50400 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -91,7 +91,7 @@ namespace nbl::ext::debug_draw static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const std::string_view archiveAlias = ""); + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const core::string& spvPath, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index 3873ad9041..f7706a15c8 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -78,14 +78,14 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) // note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; -const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const std::string_view archiveAlias) +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const core::string& spvPath, const std::string_view archiveAlias) { assert(system); if (!system) return nullptr; - if (system->exists(path(NBL_ARCHIVE_ENTRY), {})) + if (system->exists(path(NBL_ARCHIVE_ENTRY) / spvPath.c_str(), {})) { logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); return nullptr; @@ -109,8 +109,8 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - if (!system->exists(path(NBL_ARCHIVE_ENTRY), {})) - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), NBL_ARCHIVE_ENTRY); + const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), key, NBL_ARCHIVE_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; @@ -135,9 +135,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return IAsset::castDown(contents[0]); }; - auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); smart_refctd_ptr unifiedShader = getShader(key); - if (!unifiedShader) { params.utilities->getLogger()->log("Could not compile shaders!", ILogger::ELL_ERROR); From 0dce95e7f51e7268df8c6b19786f80030ca65b5f Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 23 Dec 2025 20:24:28 +0700 Subject: [PATCH 124/140] try to fit as much as possible even when fail to allocate, go down by half each time Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 48c8b50400..9442e24c28 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -147,11 +147,12 @@ namespace nbl::ext::debug_draw uint32_t remainingInstancesBytes = numInstances * sizeof(InstanceData); while (srcIt != aabbInstances.end()) { - uint32_t blockByteSize = hlsl::min(streaming->max_size(), core::alignUp(remainingInstancesBytes, MaxAlignment)); + uint32_t blockByteSize = core::alignUp(remainingInstancesBytes, MaxAlignment); bool allocated = false; offset_t blockOffset = SCachedCreationParameters::streaming_buffer_t::invalid_value; - for (uint32_t t = 0; t < 2; t++) + const uint32_t smallestAlloc = hlsl::max(core::alignUp(sizeof(InstanceData), MaxAlignment), streaming->getAddressAllocator().min_size()); + while (blockByteSize >= smallestAlloc) { std::chrono::steady_clock::time_point waitTill = std::chrono::steady_clock::now() + std::chrono::milliseconds(1u); if (streaming->multi_allocate(waitTill, 1, &blockOffset, &blockByteSize, &MaxAlignment) == 0u) @@ -159,7 +160,9 @@ namespace nbl::ext::debug_draw allocated = true; break; } + streaming->cull_frees(); + blockByteSize >>= 1; } if (!allocated) From 53736111a3850363f23284d69dca2f942d4abaa3 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz <34793522+AnastaZIuk@users.noreply.github.com> Date: Tue, 23 Dec 2025 18:57:17 +0100 Subject: [PATCH 125/140] update include paths for debug draw ext Signed-off-by: Corey --- src/nbl/ext/DebugDraw/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/ext/DebugDraw/CMakeLists.txt b/src/nbl/ext/DebugDraw/CMakeLists.txt index 25e4be718a..dfa4a7624f 100644 --- a/src/nbl/ext/DebugDraw/CMakeLists.txt +++ b/src/nbl/ext/DebugDraw/CMakeLists.txt @@ -43,6 +43,7 @@ set(JSON [=[ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS + -I "${NBL_ROOT_PATH}/include" # a workaround due to debug draw ext common header which is not part of Nabla builtin archive -I "${CMAKE_CURRENT_SOURCE_DIR}" -T lib_${SM} ) From 48eb6049491cb7f79bbabcc597012aaa2cc1a60c Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Tue, 23 Dec 2025 22:09:10 +0100 Subject: [PATCH 126/140] fix mount logic for debug draw ext, perform tests on builtins on/off Signed-off-by: Corey --- include/nbl/ext/DebugDraw/CDrawAABB.h | 2 +- src/nbl/ext/DebugDraw/CDrawAABB.cpp | 34 +++++++++++++-------------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/include/nbl/ext/DebugDraw/CDrawAABB.h b/include/nbl/ext/DebugDraw/CDrawAABB.h index 9442e24c28..126731f425 100644 --- a/include/nbl/ext/DebugDraw/CDrawAABB.h +++ b/include/nbl/ext/DebugDraw/CDrawAABB.h @@ -91,7 +91,7 @@ namespace nbl::ext::debug_draw static core::smart_refctd_ptr createDefaultPipelineLayout(video::ILogicalDevice* device, DrawMode mode = ADM_DRAW_BATCH); //! mounts the extension's archive to given system - useful if you want to create your own shaders with common header included - static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, const core::string& spvPath, const std::string_view archiveAlias = ""); + static const core::smart_refctd_ptr mount(core::smart_refctd_ptr logger, system::ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias = ""); inline const SCachedCreationParameters& getCreationParameters() const { return m_cachedCreationParams; } diff --git a/src/nbl/ext/DebugDraw/CDrawAABB.cpp b/src/nbl/ext/DebugDraw/CDrawAABB.cpp index f7706a15c8..ca82da688a 100644 --- a/src/nbl/ext/DebugDraw/CDrawAABB.cpp +++ b/src/nbl/ext/DebugDraw/CDrawAABB.cpp @@ -5,7 +5,7 @@ #include "nbl/ext/DebugDraw/CDrawAABB.h" #ifdef NBL_EMBED_BUILTIN_RESOURCES -#include "nbl/ext/debug_draw/builtin/CArchive.h" +#include "nbl/ext/debug_draw/builtin/build/CArchive.h" #endif #include "nbl/ext/DebugDraw/builtin/build/spirv/keys.hpp" @@ -75,32 +75,30 @@ core::smart_refctd_ptr DrawAABB::create(SCreationParameters&& params) return core::smart_refctd_ptr(new DrawAABB(std::move(constructorParams))); } -// note we use archive entry explicitly for temporary compiler include search path & asset cwd to use keys directly -constexpr std::string_view NBL_ARCHIVE_ENTRY = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; +// extension data mount alias +constexpr std::string_view NBL_EXT_MOUNT_ENTRY = "nbl/ext/DebugDraw"; -const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, const core::string& spvPath, const std::string_view archiveAlias) +const smart_refctd_ptr DrawAABB::mount(smart_refctd_ptr logger, ISystem* system, video::ILogicalDevice* device, const std::string_view archiveAlias) { assert(system); if (!system) return nullptr; - if (system->exists(path(NBL_ARCHIVE_ENTRY) / spvPath.c_str(), {})) - { - logger->log("CDrawAABB .spv directory is already mounted!", ILogger::ELL_WARNING); + // the key is deterministic, we are validating presence of required .spv + const auto composed = path(archiveAlias.data()) / nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(device); + if (system->exists(composed, {})) return nullptr; - } // extension should mount everything for you, regardless if content goes from virtual filesystem // or disk directly - and you should never rely on application framework to expose extension data -#ifdef NBL_EMBED_BUILTIN_RESOURCES + #ifdef NBL_EMBED_BUILTIN_RESOURCES auto archive = make_smart_refctd_ptr(smart_refctd_ptr(logger)); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#else - auto archive = make_smart_refctd_ptr(std::move(NBL_ARCHIVE_ENTRY), smart_refctd_ptr(logger), system); - system->mount(smart_refctd_ptr(archive), archiveAlias.data()); -#endif + #else + auto archive = make_smart_refctd_ptr(std::string_view(NBL_DEBUG_DRAW_HLSL_MOUNT_POINT), smart_refctd_ptr(logger), system); + #endif + system->mount(smart_refctd_ptr(archive), archiveAlias.data()); return smart_refctd_ptr(archive); } @@ -108,14 +106,13 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet { system::logger_opt_ptr logger = params.utilities->getLogger(); auto system = smart_refctd_ptr(params.assetManager->getSystem()); - - const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(params.utilities->getLogicalDevice()); - mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), key, NBL_ARCHIVE_ENTRY); + auto* device = params.utilities->getLogicalDevice(); + mount(smart_refctd_ptr(params.utilities->getLogger()), system.get(), params.utilities->getLogicalDevice(), NBL_EXT_MOUNT_ENTRY); auto getShader = [&](const core::string& key)->smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = params.utilities->getLogger(); - lp.workingDirectory = NBL_DEBUG_DRAW_HLSL_MOUNT_POINT; + lp.workingDirectory = NBL_EXT_MOUNT_ENTRY; auto bundle = params.assetManager->getAsset(key.c_str(), lp); const auto contents = bundle.getContents(); @@ -135,6 +132,7 @@ smart_refctd_ptr DrawAABB::createPipeline(SCreationParamet return IAsset::castDown(contents[0]); }; + const auto key = nbl::ext::debug_draw::builtin::build::get_spirv_key<"draw_aabb">(device); smart_refctd_ptr unifiedShader = getShader(key); if (!unifiedShader) { From 3e5d7cb0560d0eb88da2a4a403db412016321c37 Mon Sep 17 00:00:00 2001 From: Fletterio Date: Tue, 23 Dec 2025 22:23:06 -0300 Subject: [PATCH 127/140] Fixed emulated vector template resolution ambiguity Signed-off-by: Corey --- .../nbl/builtin/hlsl/emulated/vector_t.hlsl | 198 ++++++++---------- 1 file changed, 85 insertions(+), 113 deletions(-) diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 25b033c30e..d0c728a8c7 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -134,14 +134,6 @@ NBL_CONSTEXPR_FUNC this_t operator OP() NBL_CONST_MEMBER_FUNC \ } #define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ -NBL_CONSTEXPR_FUNC this_t operator OP (component_t val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - [[unroll]]\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, this_t::getComponent(i) OP val);\ - return output;\ -}\ NBL_CONSTEXPR_FUNC this_t operator OP (this_t other) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ @@ -183,6 +175,14 @@ NBL_CONSTEXPR_FUNC vector operator OP (vector;\ using component_t = ComponentType;\ +template\ +NBL_CONSTEXPR_STATIC this_t create(vector other)\ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, component_t::create(other[i]));\ + return output;\ +}\ NBL_CONSTEXPR_STATIC this_t create(this_t other)\ {\ CRTP output;\ @@ -209,6 +209,43 @@ NBL_CONSTEXPR_STATIC this_t create(vector other)\ return output;\ } +#define DEFINE_OPERATORS_FOR_TYPE(...)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) + _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) - _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) * _static_cast(val));\ +\ + return output;\ +}\ +\ +NBL_CONSTEXPR_FUNC this_t operator/(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, CRTP::getComponent(i) / _static_cast(val));\ +\ + return output;\ +}\ + // Fundamental, integral template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP @@ -232,6 +269,15 @@ struct emulated_vector) NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Fundamental, not integral @@ -253,6 +299,15 @@ struct emulated_vector) NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_uint64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_int64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Not fundamental, integral @@ -278,6 +333,20 @@ struct emulated_vector) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) + + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(float16_t) + DEFINE_OPERATORS_FOR_TYPE(float32_t) + DEFINE_OPERATORS_FOR_TYPE(float64_t) + DEFINE_OPERATORS_FOR_TYPE(uint16_t) + DEFINE_OPERATORS_FOR_TYPE(uint32_t) + DEFINE_OPERATORS_FOR_TYPE(uint64_t) + DEFINE_OPERATORS_FOR_TYPE(int16_t) + DEFINE_OPERATORS_FOR_TYPE(int32_t) + DEFINE_OPERATORS_FOR_TYPE(int64_t) }; // Not fundamental, not integral @@ -299,107 +368,12 @@ struct emulated_vector) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) -}; - -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM -#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR -#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR -#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR -#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR -#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR - -// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- - -#define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) + component_t::create(val));\ -\ - return output;\ -}\ -\ -NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) - component_t::create(val));\ -\ - return output;\ -}\ -\ -NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ -{\ - this_t output;\ - for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ - output.setComponent(i, CRTP::getComponent(i) * component_t::create(val));\ -\ - return output;\ -}\ -\ - - -template -struct emulated_vector, CRTP> : CRTP -{ - using component_t = emulated_float64_t; - using this_t = emulated_vector; - - NBL_CONSTEXPR_STATIC this_t create(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - - return output; - } - - template - NBL_CONSTEXPR_STATIC this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, component_t::create(other[i])); - - return output; - } - - NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) + other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); - - return output; - } DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) DEFINE_OPERATORS_FOR_TYPE(emulated_float64_t) + DEFINE_OPERATORS_FOR_TYPE(float16_t) DEFINE_OPERATORS_FOR_TYPE(float32_t) DEFINE_OPERATORS_FOR_TYPE(float64_t) DEFINE_OPERATORS_FOR_TYPE(uint16_t) @@ -408,17 +382,15 @@ struct emulated_vector, CRTP> : DEFINE_OPERATORS_FOR_TYPE(int16_t) DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - - NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC - { - component_t sum = component_t::create(0); - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); - - return sum; - } }; +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR #undef DEFINE_OPERATORS_FOR_TYPE template From a75adfaca06b453e82775dfad2267a36ebe1781d Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Tue, 23 Dec 2025 15:15:31 +0300 Subject: [PATCH 128/140] include `thmath.hlsl` in `functions.hlsl` Signed-off-by: Corey --- include/nbl/builtin/hlsl/math/functions.hlsl | 1 + 1 file changed, 1 insertion(+) diff --git a/include/nbl/builtin/hlsl/math/functions.hlsl b/include/nbl/builtin/hlsl/math/functions.hlsl index a52eb21c23..a1c51d4e51 100644 --- a/include/nbl/builtin/hlsl/math/functions.hlsl +++ b/include/nbl/builtin/hlsl/math/functions.hlsl @@ -5,6 +5,7 @@ #define _NBL_BUILTIN_HLSL_MATH_FUNCTIONS_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/tgmath.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl" #include "nbl/builtin/hlsl/concepts/vector.hlsl" From dc6ce11838724c848f51df69b36ce0822747b57f Mon Sep 17 00:00:00 2001 From: Corey Date: Mon, 29 Sep 2025 21:42:19 -0500 Subject: [PATCH 129/140] init Signed-off-by: Corey --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 8a04f82d9d..1b3a3671df 100644 --- a/.gitmodules +++ b/.gitmodules @@ -84,7 +84,7 @@ url = git@github.com:Devsh-Graphics-Programming/Nabla-Continous-Integration-Python-Framework.git [submodule "3rdparty/boost/superproject"] path = 3rdparty/boost/superproject - url = ../boost.git + url = git@github.com:Devsh-Graphics-Programming/boost.git [submodule "3rdparty/argparse"] path = 3rdparty/argparse url = git@github.com:p-ranav/argparse.git @@ -117,7 +117,7 @@ url = git@github.com:Devsh-Graphics-Programming/glm.git [submodule "docker/msvc-winsdk"] path = docker/msvc-winsdk - url = ../docker-nanoserver-msvc-winsdk + url = git@github.com:Devsh-Graphics-Programming/docker-nanoserver-msvc-winsdk.git [submodule "3rdparty/gtl"] path = 3rdparty/gtl url = https://github.com/greg7mdp/gtl.git From 2d9f2f6a3a20a7de37191ca578b18d6bbca7e8a7 Mon Sep 17 00:00:00 2001 From: Corey Date: Thu, 2 Oct 2025 21:36:17 -0500 Subject: [PATCH 130/140] i still need to reconcile mesh with compute. its based off graphics right now. graphics is stable tho Signed-off-by: Corey --- include/nbl/asset/IAsset.h | 1 + include/nbl/asset/ICPUMeshPipeline.h | 141 ++++++++++ include/nbl/asset/IMeshPipeline.h | 72 +++++ include/nbl/video/IGPUMeshPipeline.h | 168 ++++++++++++ include/nbl/video/ILogicalDevice.h | 28 +- include/nbl/video/asset_traits.h | 16 ++ src/nbl/video/CVulkanLogicalDevice.cpp | 360 +++++++++++++++---------- src/nbl/video/CVulkanLogicalDevice.h | 10 +- src/nbl/video/CVulkanMeshPipeline.cpp | 27 ++ src/nbl/video/CVulkanMeshPipeline.h | 31 +++ src/nbl/video/ILogicalDevice.cpp | 243 +++++++++++------ 11 files changed, 880 insertions(+), 217 deletions(-) create mode 100644 include/nbl/asset/ICPUMeshPipeline.h create mode 100644 include/nbl/asset/IMeshPipeline.h create mode 100644 include/nbl/video/IGPUMeshPipeline.h create mode 100644 src/nbl/video/CVulkanMeshPipeline.cpp create mode 100644 src/nbl/video/CVulkanMeshPipeline.h diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index a691fa6af6..6c3935d302 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -95,6 +95,7 @@ class IAsset : virtual public core::IReferenceCounted ET_PIPELINE_CACHE = 1ull<<21, //!< asset::ICPUPipelineCache ET_SCENE = 1ull<<22, //!< reserved, to implement later ET_RAYTRACING_PIPELINE = 1ull << 23, //!< asset::ICPURayTracingPipeline + ET_MESH_PIPELINE = 1ull << 24, ET_IMPLEMENTATION_SPECIFIC_METADATA = 1ull<<31u, //!< lights, etc. //! Reserved special value used for things like terminating lists of this enum diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h new file mode 100644 index 0000000000..7a0aa07e8d --- /dev/null +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -0,0 +1,141 @@ +#ifndef _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/asset/IMeshPipeline.h" +#include "nbl/asset/ICPURenderpass.h" +#include "nbl/asset/ICPUPipeline.h" + + +namespace nbl::asset +{ + +class ICPUMeshPipeline final : public ICPUPipeline> +{ + using pipeline_base_t = IMeshPipeline; + using base_t = ICPUPipeline; + + public: + + static core::smart_refctd_ptr create(ICPUPipelineLayout* layout, ICPURenderpass* renderpass = nullptr) + { + auto retval = new ICPUMeshPipeline(layout, renderpass); + return core::smart_refctd_ptr(retval,core::dont_grab); + } + + constexpr static inline auto AssetType = ET_MESH_PIPELINE; + inline E_TYPE getAssetType() const override { return AssetType; } + + inline const SCachedCreationParams& getCachedCreationParams() const + { + return pipeline_base_t::getCachedCreationParams(); + } + + inline SCachedCreationParams& getCachedCreationParams() + { + assert(isMutable()); + return m_params; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) const override final + { + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return { &m_specInfos[stageIndex], 1 }; + return {}; + } + + inline std::span getSpecInfos(const hlsl::ShaderStage stage) + { + return base_t::getSpecInfos(stage); + } + + SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) + { + if (!isMutable()) return nullptr; + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return &m_specInfos[stageIndex]; + return nullptr; + } + + const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const + { + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return &m_specInfos[stageIndex]; + return nullptr; + } + + inline bool valid() const override + { + if (!m_layout) return false; + if (!m_layout->valid())return false; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!m_renderpass || m_params.subpassIx >= m_renderpass->getSubpassCount()) return false; + + core::bitflag stagePresence = {}; + for (auto shader_i = 0u; shader_i < m_specInfos.size(); shader_i++) + { + const auto& info = m_specInfos[shader_i]; + if (info.shader) + stagePresence |= indexToStage(shader_i); + } + return hasRequiredStages(stagePresence); + } + + protected: + using base_t::base_t; + virtual ~ICPUMeshPipeline() override = default; + + std::array m_specInfos; + + private: + explicit ICPUMeshPipeline(ICPUPipelineLayout* layout, ICPURenderpass* renderpass) + : base_t(layout, {}, renderpass) + {} + + static inline int8_t stageToIndex(const hlsl::ShaderStage stage) + { + const auto stageIx = hlsl::findLSB(stage); + if (stageIx < 0 || stageIx >= MESH_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1) + return -1; + return stageIx; + } + + static inline hlsl::ShaderStage indexToStage(const int8_t index) + { + switch (index) { + case 0: return hlsl::ShaderStage::ESS_TASK; + case 1: return hlsl::ShaderStage::ESS_MESH; + case 2: return hlsl::ShaderStage::ESS_FRAGMENT; + } + return hlsl::ShaderStage::ESS_UNKNOWN; + } + + inline core::smart_refctd_ptr clone_impl(core::smart_refctd_ptr&& layout, uint32_t depth) const override final + { + auto* newPipeline = new ICPUMeshPipeline(layout.get(), m_renderpass.get()); + newPipeline->m_params = m_params; + + for (auto specInfo_i = 0u; specInfo_i < m_specInfos.size(); specInfo_i++) + { + newPipeline->m_specInfos[specInfo_i] = m_specInfos[specInfo_i].clone(depth); + } + + return core::smart_refctd_ptr(newPipeline, core::dont_grab); + } + + inline void visitDependents_impl(std::function visit) const override + { + if (!visit(m_layout.get())) return; + if (!visit(m_renderpass.get())) return; + for (const auto& info : m_specInfos) + if (!visit(info.shader.get())) return; + } +}; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/asset/IMeshPipeline.h b/include/nbl/asset/IMeshPipeline.h new file mode 100644 index 0000000000..a3a297bbc0 --- /dev/null +++ b/include/nbl/asset/IMeshPipeline.h @@ -0,0 +1,72 @@ +#ifndef _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IShader.h" +#include "nbl/asset/RasterizationStates.h" +#include "nbl/asset/IPipeline.h" + + +namespace nbl::asset { + class IMeshPipelineBase : public virtual core::IReferenceCounted { + public: + constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; //i dont know what this is going to be used for yet, might be redundant + struct SCachedCreationParams final { + SRasterizationParams rasterization = {}; + SBlendParams blend = {}; + uint32_t subpassIx = 0u; //this subpass stuff is eluding me rn. i might just need to crack open the vulkan documentation + uint8_t requireFullSubgroups = false; + }; + + }; + + template + class IMeshPipeline : public IPipeline, public IMeshPipelineBase { + protected: + using renderpass_t = RenderpassType; + //using base_creation_params_t = IPipeline;//compute uses this, idk if its necessary yet + public: + + static inline bool hasRequiredStages(const core::bitflag& stagePresence) + { + /* + VUID-VkPipelineShaderStageCreateInfo-stage-02091 + If the meshShaders feature is not enabled, stage must not be VK_SHADER_STAGE_MESH_BIT_EXT + + VUID-VkPipelineShaderStageCreateInfo-stage-02092 + If the taskShaders feature is not enabled, stage must not be VK_SHADER_STAGE_TASK_BIT_EXT + + need to check extentions here + + maybe assert vertex,geo, and tess arent used here? + */ + + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096 + if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) { + return false; + } + //i dont quite understand why igraphicspipeline doesnt require a fragment shader. is it not required by vulkan? + if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_FRAGMENT)) { + return false; + } + + return true; + } + + inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } + + protected: + explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) : + IPipeline(core::smart_refctd_ptr(layout)), + m_params(cachedParams), m_renderpass(core::smart_refctd_ptr(renderpass)) + { + } + + SCachedCreationParams m_params = {}; + core::smart_refctd_ptr m_renderpass = nullptr; + }; + +} + + +#endif diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h new file mode 100644 index 0000000000..37bf409355 --- /dev/null +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -0,0 +1,168 @@ +#ifndef _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_I_GPU_MESH_PIPELINE_H_INCLUDED_ + +#include "nbl/asset/IMeshPipeline.h" + +#include "nbl/video/IGPUPipelineLayout.h" +#include "nbl/video/IGPURenderpass.h" +#include "nbl/video/IGPUPipeline.h" + +//related spec + +//i feel like this MIGHT get stuffed into graphicspipeline but idk + +/* +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-PrimitiveId-06264 +** If the pipeline requires pre-rasterization shader state, it includes a mesh shader and the fragment shader code reads from an input variable that is decorated with PrimitiveId, then the mesh shader code must write to a matching output variable, decorated with PrimitiveId, in all execution paths + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07064 +* If renderPass is not VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, subpass viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-None-02322 +* If the pipeline requires pre-rasterization shader state, and there are any mesh shader stages in the pipeline there must not be any shader stage in the pipeline with a Xfb execution mode +*** whats a xfb + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-shaderMeshEnqueue-10187 +* If the shaderMeshEnqueue feature is not enabled, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-flags-10188 +* If flags does not include VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, shaders specified by pStages must not declare the ShaderEnqueueAMDX capability +*** my understanding is nabla strictly controls it's extensions, so this shouldnt be an issue + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07065 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY, or VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE +*** this one seems the most relevant + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07066 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the +* pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE, or VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-pDynamicStates-07067 +* If the pipeline requires pre-rasterization shader state, and includes a mesh shader, there must be no element of the pDynamicStates member of pDynamicState set to VK_DYNAMIC_STATE_VERTEX_INPUT_EXT + +https://registry.khronos.org/vulkan/specs/latest/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-07720 +* If renderPass is VK_NULL_HANDLE, the pipeline is being created with pre-rasterization shader state, and +* VkPipelineRenderingCreateInfo::viewMask is not 0, and multiviewMeshShader is not enabled, then pStages must not include a mesh shader + + +* theres 1 or 2 more about pipeline libraries, but im not going to worry about that +*/ + +namespace nbl::video +{ + + class IGPUMeshPipeline : public IGPUPipeline> + { + using pipeline_t = asset::IMeshPipeline; + + public: + struct SCreationParams final : public SPipelineCreationParams + { + public: +#define base_flag(F) static_cast(pipeline_t::FLAGS::F) + enum class FLAGS : uint64_t + { + NONE = base_flag(NONE), + DISABLE_OPTIMIZATIONS = base_flag(DISABLE_OPTIMIZATIONS), + ALLOW_DERIVATIVES = base_flag(ALLOW_DERIVATIVES), + VIEW_INDEX_FROM_DEVICE_INDEX = 1 << 3, + FAIL_ON_PIPELINE_COMPILE_REQUIRED = base_flag(FAIL_ON_PIPELINE_COMPILE_REQUIRED), + EARLY_RETURN_ON_FAILURE = base_flag(EARLY_RETURN_ON_FAILURE), + }; +#undef base_flag + + inline SSpecializationValidationResult valid() const + { + if (!layout) + return {}; + SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; + if (!layout) + return {}; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 + if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount()) + return {}; + + // TODO: check rasterization samples, etc. + //rp->getCreationParameters().subpasses[i] + + core::bitflag stagePresence = {}; + + auto processSpecInfo = [&](const SShaderSpecInfo& specInfo, hlsl::ShaderStage stage) + { + if (!specInfo.shader) return true; + if (!specInfo.accumulateSpecializationValidationResult(&retval)) return false; + stagePresence |= stage; + return true; + }; + if (!processSpecInfo(taskShader, hlsl::ShaderStage::ESS_TASK)) return {}; + if (!processSpecInfo(meshShader, hlsl::ShaderStage::ESS_MESH)) return {}; + if (!processSpecInfo(fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT)) return {}; + + if (!hasRequiredStages(stagePresence)) + return {}; + + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly? + + return retval; + } + + inline core::bitflag getRequiredSubgroupStages() const + { + + core::bitflag stages = {}; + auto processSpecInfo = [&](const SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader && spec.requiredSubgroupSize >= SUBGROUP_SIZE::REQUIRE_4) { + stages |= stage; + } + }; + processSpecInfo(taskShader, hlsl::ESS_TASK); + processSpecInfo(meshShader, hlsl::ESS_MESH); + processSpecInfo(fragmentShader, hlsl::ESS_FRAGMENT); + return stages; + } + + inline core::bitflag& getFlags() { return flags; } + + inline core::bitflag getFlags() const { return flags; } + + const IGPUPipelineLayout* layout = nullptr; + SShaderSpecInfo taskShader; + SShaderSpecInfo meshShader; + SShaderSpecInfo fragmentShader; + SCachedCreationParams cached = {}; + renderpass_t* renderpass = nullptr; + + // TODO: Could guess the required flags from SPIR-V introspection of declared caps + core::bitflag flags = FLAGS::NONE; + + inline uint32_t getShaderCount() const + { + uint32_t count = 0; //count = 2 and only check task shader?? + count += (taskShader.shader != nullptr); + count += (meshShader.shader != nullptr); + count += (fragmentShader.shader != nullptr); + return count; + } + }; + + inline core::bitflag getCreationFlags() const { return m_flags; } + + // Vulkan: const VkPipeline* + virtual const void* getNativeHandle() const = 0; + + protected: + // not explicit? + IGPUMeshPipeline(const SCreationParams& params) : + IGPUPipeline(core::smart_refctd_ptr(params.layout->getOriginDevice()), params.layout, params.cached, params.renderpass), m_flags(params.flags) + { + } + virtual ~IGPUMeshPipeline() override = default; + + const core::bitflag m_flags; + }; + +} + +#endif \ No newline at end of file diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 180342e2d4..0a8c79053c 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -1020,7 +1020,11 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return createPipelineCache(initialData,notThreadsafe); } - bool createComputePipelines(IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output); + bool createComputePipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); bool createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, @@ -1028,9 +1032,17 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output ); - bool createRayTracingPipelines(IGPUPipelineCache* const pipelineCache, - const std::span params, - core::smart_refctd_ptr* const output); + bool createRayTracingPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); + + bool createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output + ); // queries inline core::smart_refctd_ptr createQueryPool(const IQueryPool::SCreationParams& params) @@ -1282,12 +1294,18 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; - virtual void createGraphicsPipelines_impl( + virtual void createGraphicsPipelines_impl ( IGPUPipelineCache* const pipelineCache, const std::span params, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) = 0; + virtual void createMeshPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) = 0; virtual void createRayTracingPipelines_impl( IGPUPipelineCache* const pipelineCache, const std::span createInfos, diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index c4a6c25ca5..d98274778c 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -24,6 +24,9 @@ #include "nbl/asset/ICPURayTracingPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" +#include "nbl/asset/ICPUMeshPipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" + namespace nbl::video { @@ -136,6 +139,19 @@ struct asset_traits using lookup_t = const video_t*; }; +template<> +struct asset_traits { + //the asset type + using asset_t = asset::ICPUMeshPipeline; + // we reference a pipeline layout and a renderpass + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPUGraphicsPipeline; + // lookup type + using lookup_t = const video_t*; +}; + + template<> struct asset_traits diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 5390b4c3fa..928497ccaf 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1176,12 +1176,10 @@ void CVulkanLogicalDevice::createComputePipelines_impl( for (const auto& info : createInfos) { initPipelineCreateInfo(outCreateInfo,info); - const auto& spec = info.shader; - outCreateInfo->stage = getVkShaderStageCreateInfoFrom(spec, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + outCreateInfo->stage = getVkShaderStageCreateInfoFrom(info.shader, hlsl::ShaderStage::ESS_COMPUTE, info.cached.requireFullSubgroups, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); outCreateInfo++; } auto vk_pipelines = reinterpret_cast(output); - std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateComputePipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) { for (size_t i=0ull; i( info,vk_pipeline ); + std::stringstream debugNameBuilder; debugNameBuilder.str(""); const auto& specInfo = createInfos[i].shader; debugNameBuilder << specInfo.shader->getFilepathHint() << "(" << specInfo.entryPoint << "," << hlsl::ShaderStage::ESS_COMPUTE << ")\n"; @@ -1202,26 +1201,136 @@ void CVulkanLogicalDevice::createComputePipelines_impl( std::fill_n(output,vk_createInfos.size(),nullptr); } -void CVulkanLogicalDevice::createGraphicsPipelines_impl( - IGPUPipelineCache* const pipelineCache, - const std::span createInfos, - core::smart_refctd_ptr* const output, - const SSpecializationValidationResult& validation -) -{ - auto getVkStencilOpStateFrom = [](const asset::SStencilOpParams& params)->VkStencilOpState - { - return { - .failOp = static_cast(params.failOp), - .passOp = static_cast(params.passOp), - .depthFailOp = static_cast(params.depthFailOp), - .compareOp = static_cast(params.compareOp) - }; +void PopulateViewport(VkPipelineViewportStateCreateInfo& outViewport, nbl::asset::SRasterizationParams const& raster){ + outViewport.viewportCount = raster.viewportCount; + // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used + outViewport.scissorCount = raster.viewportCount; +} + + +void PopulateRaster(VkPipelineRasterizationStateCreateInfo& outRaster, nbl::asset::SRasterizationParams const& raster){ + outRaster.depthClampEnable = raster.depthClampEnable; + outRaster.rasterizerDiscardEnable = raster.rasterizerDiscard; + outRaster.polygonMode = static_cast(raster.polygonMode); + outRaster.cullMode = static_cast(raster.faceCullingMode); + outRaster.frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE:VK_FRONT_FACE_CLOCKWISE; + outRaster.depthBiasEnable = raster.depthBiasEnable; +} + +void PopulateMultisample(VkPipelineMultisampleStateCreateInfo& outMultisample, nbl::asset::SRasterizationParams const& raster){ + outMultisample.rasterizationSamples = static_cast(0x1<0) { + outMultisample.sampleShadingEnable = true; + outMultisample.minSampleShading = float(raster.minSampleShadingUnorm)/255.f; + } + else { + outMultisample.sampleShadingEnable = false; + outMultisample.minSampleShading = 0.f; + } + outMultisample.pSampleMask = raster.sampleMask; + outMultisample.alphaToCoverageEnable = raster.alphaToCoverageEnable; + outMultisample.alphaToOneEnable = raster.alphaToOneEnable; +} +VkStencilOpState getVkStencilOpStateFrom(const asset::SStencilOpParams& params){ + return { + .failOp = static_cast(params.failOp), + .passOp = static_cast(params.passOp), + .depthFailOp = static_cast(params.depthFailOp), + .compareOp = static_cast(params.compareOp) }; +} - const auto& features = getEnabledFeatures(); +void PopulateDepthStencil(VkPipelineDepthStencilStateCreateInfo& outDepthStencil, nbl::asset::SRasterizationParams const& raster){ + outDepthStencil.depthTestEnable = raster.depthTestEnable(); + outDepthStencil.depthWriteEnable = raster.depthWriteEnable; + outDepthStencil.depthCompareOp = static_cast(raster.depthCompareOp); + outDepthStencil.depthBoundsTestEnable = raster.depthBoundsTestEnable; + outDepthStencil.stencilTestEnable = raster.stencilTestEnable(); + outDepthStencil.front = getVkStencilOpStateFrom(raster.frontStencilOps); + outDepthStencil.back = getVkStencilOpStateFrom(raster.backStencilOps); +} - core::vector vk_dynamicStates = { +void PopulateColorBlend( + VkPipelineColorBlendStateCreateInfo& outColorBlend, + VkPipelineColorBlendAttachmentState*& outColorBlendAttachmentState, + nbl::asset::SBlendParams const& blend, + nbl::asset::IRenderpass::SCreationParams::SSubpassDescription const& subpass +) { + //outColorBlend->flags no attachment order access yet + outColorBlend.logicOpEnable = blend.logicOp!=asset::ELO_NO_OP; + outColorBlend.logicOp = getVkLogicOpFromLogicOp(blend.logicOp); + outColorBlend.pAttachments = outColorBlendAttachmentState; + for (auto i=0; iblendEnable = params.blendEnabled(); + outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); + outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); + outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); + outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); + outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); + outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); + outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); + outColorBlendAttachmentState++; + //^that pointer iterator is how we ensure the attachments or consecutive + } + } + outColorBlend.attachmentCount = std::distance(outColorBlend.pAttachments,outColorBlendAttachmentState); +} + +template +void PopulateMeshGraphicsCommonData( + const std::span createInfos, + core::vector& vk_createInfos, + + core::vector& vk_viewportStates, + core::vector& vk_rasterizationStates, + core::vector& vk_multisampleStates, + core::vector& vk_depthStencilStates, + core::vector& vk_colorBlendStates, + core::vector& vk_colorBlendAttachmentStates, + + core::vector& vk_dynamicStates, + const VkPipelineDynamicStateCreateInfo& vk_dynamicStateCreateInfo +){ + //the main concern is lifetime, so don't want to construct, move, or copy anything in here + + auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); //the pointer iterator is used + + + for (uint32_t i = 0; i < createInfos.size(); i++){ //whats the maximum number of pipelines that can be created at once? uint32_t to be safe + auto& info = createInfos[i]; + const auto& blend = info.cached.blend; + const auto& raster = info.cached.rasterization; + const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; + + initPipelineCreateInfo(&vk_createInfos[i], info); + + PopulateViewport(vk_viewportStates[i], raster); + PopulateRaster(vk_rasterizationStates[i], raster); + PopulateMultisample(vk_multisampleStates[i], raster); + PopulateDepthStencil(vk_depthStencilStates[i], raster); + PopulateColorBlend(vk_colorBlendStates[i], outColorBlendAttachmentState, blend, subpass); + //PopulateDynamicState(dynState, ?) + + + vk_createInfos[i].pViewportState = &vk_viewportStates[i]; + vk_createInfos[i].pRasterizationState = &vk_rasterizationStates[i]; + vk_createInfos[i].pMultisampleState = &vk_multisampleStates[i]; + vk_createInfos[i].pDepthStencilState = &vk_depthStencilStates[i]; + vk_createInfos[i].pColorBlendState = &vk_colorBlendStates[i]; + vk_createInfos[i].pDynamicState = &vk_dynamicStateCreateInfo; + vk_createInfos[i].renderPass = static_cast(info.renderpass)->getInternalObject(); + vk_createInfos[i].subpass = info.cached.subpassIx; + //handle + //index + //layout? + // ^ handled in initPipelineCreateInfo + } +} + +core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures const& features){ + core::vector ret = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_LINE_WIDTH, @@ -1231,17 +1340,43 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE }; - if (features.depthBounds) - vk_dynamicStates.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + if (features.depthBounds){ + ret.push_back(VK_DYNAMIC_STATE_DEPTH_BOUNDS); + } // TODO: VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT, VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT - - const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0u, - .dynamicStateCount = static_cast(vk_dynamicStates.size()), - .pDynamicStates = vk_dynamicStates.data() - }; + + return ret; +} + +//maximum cleanliness,i tried it and im not a big fan +//struct CommonPipelineStruct { +// VkPipelineRasterizationStateCreateInfo vk_rasterizationStates{ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineMultisampleStateCreateInfo vk_multisampleStates{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineDepthStencilStateCreateInfo vk_depthStencilStates{ VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }; +// VkPipelineColorBlendStateCreateInfo vk_colorBlendStates{ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }; +// core::vector vk_colorBlendAttachmentStates{ IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments }; +//}; + + +void CVulkanLogicalDevice::createMeshPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) { + const auto& features = getEnabledFeatures(); + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; + +} + +void CVulkanLogicalDevice::createGraphicsPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span createInfos, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation +) +{ + const auto& features = getEnabledFeatures(); const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject():VK_NULL_HANDLE; // Interesting things to put in pNext: @@ -1252,8 +1387,55 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( // - Discard Rectangle State // - Fragment Shading Rate State Creation Info // - Piepline Robustness + + //maximum cleanliness, I create a struct that holds this for mesh and graphics? core::vector vk_createInfos(createInfos.size(),{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr}); + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = static_cast(vk_dynamicStates.size()), + .pDynamicStates = vk_dynamicStates.data() + }; + core::vector vk_viewportStates(createInfos.size(), { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + }); + + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + + core::vector vk_inputBinding(createInfos.size() * asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); + core::vector vk_inputAttribute(createInfos.size() * asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); + core::vector vk_inputAssembly(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); + const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); @@ -1264,25 +1446,6 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( core::vector vk_specializationInfos(maxShaderStages,{0,nullptr,0,nullptr}); core::vector vk_specializationMapEntry(validation.count); core::vector specializationData(validation.dataSize); - core::vector vk_vertexInput(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_inputBinding(createInfos.size()*asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT); - core::vector vk_inputAttribute(createInfos.size()*asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT); - core::vector vk_inputAssembly(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_tessellation(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_viewportStates(createInfos.size(),{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway - .flags = 0, // must be 0 - .viewportCount = 0, - .pViewports = nullptr, - .scissorCount = 0, - .pScissors = nullptr, - }); - core::vector vk_rasterizationStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_multisampleStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_depthStencilStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendStates(createInfos.size(),{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0}); - core::vector vk_colorBlendAttachmentStates(createInfos.size()*IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); auto outCreateInfo = vk_createInfos.data(); auto outShaderStage = vk_shaderStage.data(); @@ -1295,25 +1458,20 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outVertexInput = vk_vertexInput.data(); auto outInputBinding = vk_inputBinding.data(); auto outInputAttribute = vk_inputAttribute.data(); - auto outInputAssembly = vk_inputAssembly.data(); auto outTessellation = vk_tessellation.data(); - auto outViewport = vk_viewportStates.data(); - auto outRaster = vk_rasterizationStates.data(); - auto outMultisample = vk_multisampleStates.data(); - auto outDepthStencil = vk_depthStencilStates.data(); - auto outColorBlend = vk_colorBlendStates.data(); - auto outColorBlendAttachmentState = vk_colorBlendAttachmentStates.data(); + auto outInputAssembly = vk_inputAssembly.data(); + + //graphics only stuff for (const auto& info : createInfos) { - initPipelineCreateInfo(outCreateInfo,info); outCreateInfo->pStages = outShaderStage; auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) { if (spec.shader) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); } }; processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -1355,87 +1513,19 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; } outCreateInfo->pInputAssemblyState = outInputAssembly++; - } - - if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) - { - outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; - outCreateInfo->pTessellationState = outTessellation++; - } - - const auto& raster = info.cached.rasterization; - { - outViewport->viewportCount = raster.viewportCount; - // must be identical to viewport count unless VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT or VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT are used - outViewport->scissorCount = raster.viewportCount; - outCreateInfo->pViewportState = outViewport++; - } - { - outRaster->depthClampEnable = raster.depthClampEnable; - outRaster->rasterizerDiscardEnable = raster.rasterizerDiscard; - outRaster->polygonMode = static_cast(raster.polygonMode); - outRaster->cullMode = static_cast(raster.faceCullingMode); - outRaster->frontFace = raster.frontFaceIsCCW ? VK_FRONT_FACE_COUNTER_CLOCKWISE:VK_FRONT_FACE_CLOCKWISE; - outRaster->depthBiasEnable = raster.depthBiasEnable; - outCreateInfo->pRasterizationState = outRaster++; - } - { - outMultisample->rasterizationSamples = static_cast(0x1<0) - { - outMultisample->sampleShadingEnable = true; - outMultisample->minSampleShading = float(raster.minSampleShadingUnorm)/255.f; - } - else - { - outMultisample->sampleShadingEnable = false; - outMultisample->minSampleShading = 0.f; - } - outMultisample->pSampleMask = raster.sampleMask; - outMultisample->alphaToCoverageEnable = raster.alphaToCoverageEnable; - outMultisample->alphaToOneEnable = raster.alphaToOneEnable; - outCreateInfo->pMultisampleState = outMultisample++; - } - { - //outDepthStencil->flags no attachment order access yet - outDepthStencil->depthTestEnable = raster.depthTestEnable(); - outDepthStencil->depthWriteEnable = raster.depthWriteEnable; - outDepthStencil->depthCompareOp = static_cast(raster.depthCompareOp); - outDepthStencil->depthBoundsTestEnable = raster.depthBoundsTestEnable; - outDepthStencil->stencilTestEnable = raster.stencilTestEnable(); - outDepthStencil->front = getVkStencilOpStateFrom(raster.frontStencilOps); - outDepthStencil->back = getVkStencilOpStateFrom(raster.backStencilOps); - outCreateInfo->pDepthStencilState = outDepthStencil++; - } - { - const auto& blend = info.cached.blend; - const auto& subpass = info.renderpass->getCreationParameters().subpasses[info.cached.subpassIx]; - //outColorBlend->flags no attachment order access yet - outColorBlend->logicOpEnable = blend.logicOp!=asset::ELO_NO_OP; - outColorBlend->logicOp = getVkLogicOpFromLogicOp(blend.logicOp); - outColorBlend->pAttachments = outColorBlendAttachmentState; - for (auto i=0; iblendEnable = params.blendEnabled(); - outColorBlendAttachmentState->srcColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcColorFactor)); - outColorBlendAttachmentState->dstColorBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstColorFactor)); - outColorBlendAttachmentState->colorBlendOp = getVkBlendOpFromBlendOp(static_cast(params.colorBlendOp)); - outColorBlendAttachmentState->srcAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.srcAlphaFactor)); - outColorBlendAttachmentState->dstAlphaBlendFactor = getVkBlendFactorFromBlendFactor(static_cast(params.dstAlphaFactor)); - outColorBlendAttachmentState->alphaBlendOp = getVkBlendOpFromBlendOp(static_cast(params.alphaBlendOp)); - outColorBlendAttachmentState->colorWriteMask = getVkColorComponentFlagsFromColorWriteMask(params.colorWriteMask); - outColorBlendAttachmentState++; + outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; + outCreateInfo->pTessellationState = outTessellation++; } - outColorBlend->attachmentCount = std::distance(outColorBlend->pAttachments,outColorBlendAttachmentState); - outCreateInfo->pColorBlendState = outColorBlend++; } - outCreateInfo->pDynamicState = &vk_dynamicStateCreateInfo; - outCreateInfo->renderPass = static_cast(info.renderpass)->getInternalObject(); - outCreateInfo->subpass = info.cached.subpassIx; + outCreateInfo++; } + + + auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 4cc633ec55..f5e099cf92 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -20,7 +20,6 @@ #include "nbl/video/CVulkanSampler.h" #include "nbl/video/CVulkanPipelineLayout.h" #include "nbl/video/CVulkanPipelineCache.h" -#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanDescriptorPool.h" #include "nbl/video/CVulkanDescriptorSet.h" #include "nbl/video/CVulkanMemoryAllocation.h" @@ -29,7 +28,10 @@ #include "nbl/video/CVulkanImage.h" #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" + +#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" namespace nbl::video @@ -293,6 +295,12 @@ class CVulkanLogicalDevice final : public ILogicalDevice core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; + void createMeshPipelines_impl( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output, + const SSpecializationValidationResult& validation + ) override; //final? void createRayTracingPipelines_impl( IGPUPipelineCache* const pipelineCache, diff --git a/src/nbl/video/CVulkanMeshPipeline.cpp b/src/nbl/video/CVulkanMeshPipeline.cpp new file mode 100644 index 0000000000..5801fb075c --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.cpp @@ -0,0 +1,27 @@ +#include "nbl/video/CVulkanMeshPipeline.h" + +#include "nbl/video/CVulkanLogicalDevice.h" + +namespace nbl::video +{ + + CVulkanMeshPipeline::~CVulkanMeshPipeline() + { + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + auto* vk = vulkanDevice->getFunctionTable(); + vk->vk.vkDestroyPipeline(vulkanDevice->getInternalObject(), m_vkPipeline, nullptr); + } + void CVulkanMeshPipeline::setObjectDebugName(const char* label) const + { + IBackendObject::setObjectDebugName(label); + + if (vkSetDebugUtilsObjectNameEXT == 0) return; + + const CVulkanLogicalDevice* vulkanDevice = static_cast(getOriginDevice()); + VkDebugUtilsObjectNameInfoEXT nameInfo = { VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr }; + nameInfo.objectType = VK_OBJECT_TYPE_PIPELINE; + nameInfo.objectHandle = reinterpret_cast(getInternalObject()); + nameInfo.pObjectName = getObjectDebugName(); + vkSetDebugUtilsObjectNameEXT(vulkanDevice->getInternalObject(), &nameInfo); + } +} \ No newline at end of file diff --git a/src/nbl/video/CVulkanMeshPipeline.h b/src/nbl/video/CVulkanMeshPipeline.h new file mode 100644 index 0000000000..3bf68d33a0 --- /dev/null +++ b/src/nbl/video/CVulkanMeshPipeline.h @@ -0,0 +1,31 @@ +#ifndef _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ +#define _NBL_C_VULKAN_MESH_PIPELINE_H_INCLUDED_ + + +#include "nbl/video/IGPUMeshPipeline.h" + +#include + +namespace nbl::video +{ + +class CVulkanMeshPipeline final : public IGPUMeshPipeline +{ + public: + CVulkanMeshPipeline(const SCreationParams& params, const VkPipeline vk_pipeline) : + IGPUMeshPipeline(params), m_vkPipeline(vk_pipeline) {} + + inline const void* getNativeHandle() const override {return &m_vkPipeline;} + + inline VkPipeline getInternalObject() const {return m_vkPipeline;} + + void setObjectDebugName(const char* label) const override; //exists in compute but not in graphics + private: + ~CVulkanMeshPipeline(); + + const VkPipeline m_vkPipeline; +}; + +} + +#endif diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7c3f5dbb81..561574b83d 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -833,6 +833,172 @@ bool ILogicalDevice::createComputePipelines(IGPUPipelineCache* const pipelineCac return retval; } +bool MeshGraphicsCommonValidation( + const IGPURenderpass* renderpass, uint8_t subpassIndex, + SPhysicalDeviceLimits const& limits, SPhysicalDeviceFeatures const& features, + nbl::asset::SRasterizationParams const& rasterParams, nbl::asset::SBlendParams const& blendParams, + const system::logger_opt_ptr m_logger, + const IPhysicalDevice::SFormatImageUsages& formatUsages +) { + if (rasterParams.alphaToOneEnable && !features.alphaToOne) + { + NBL_LOG_ERROR("Feature `alpha to one` is not enabled"); + return false; + } + if (rasterParams.depthBoundsTestEnable && !features.depthBounds) + { + NBL_LOG_ERROR("Feature `depth bounds` is not enabled"); + return false; + } + const auto samples = 0x1u << rasterParams.samplesLog2; + + const auto& passParams = renderpass->getCreationParameters(); + const auto& subpass = passParams.subpasses[subpassIndex]; + if (subpass.viewMask) + { + /* + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 + if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) + return false; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 + if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) + return false; + */ + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 + //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= + if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) + { + NBL_LOG_ERROR("Invalid viewMask (params[%u])", subpassIndex); + return false; + } + } + if (subpass.depthStencilAttachment.render.used()) + { + const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 + if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) + sampleCountNeedsToMatch = true; + if (sampleCountNeedsToMatch && attachment.samples != samples) + { + NBL_LOG_ERROR("Depth stencil and rasterization samples need to match (params[%u])", subpassIndex); + return false; + } + } + for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) + { + const auto& render = subpass.colorAttachments[i].render; + if (render.used()) + { + const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 + if (blendParams.blendParams[i].blendEnabled() && !formatUsages[attachment.format].attachmentBlend) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 + if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) + { + NBL_LOG_ERROR("Color attachment and rasterization samples need to match (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 + if (/*detect NV version && */(attachment.samples > samples)) + { + NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", subpassIndex, i); + return false; + } + } + } + + return true; +} + +//this is a COPY of graphics pipeline, with MINOR adjustments. +//no changes should be made DIRECTLY here +//UNLESS it's DIRECTLY for mesh/task +//there SHOULD be a function duplicates functionality between graphics and mesh pipeline that can be adjusted first +bool ILogicalDevice::createMeshPipelines( + IGPUPipelineCache* const pipelineCache, + const std::span params, + core::smart_refctd_ptr* const output +) { + std::fill_n(output, params.size(), nullptr); + SSpecializationValidationResult specConstantValidation = commonCreatePipelines(pipelineCache, params); + if (!specConstantValidation) { + NBL_LOG_ERROR("Invalid parameters were given"); + return false; + } + + const auto& features = getEnabledFeatures(); + const auto& limits = getPhysicalDeviceLimits(); + + core::vector newParams(params.begin(), params.end()); + const auto shaderCount = std::accumulate(params.begin(), params.end(), 0, [](uint32_t sum, auto& param) + {return sum + param.getShaderCount();} + ); + core::vector> trimmedShaders; // vector to hold all the trimmed shaders, so the pointer from the new ShaderSpecInfo is not dangling + trimmedShaders.reserve(shaderCount); + + for (auto ix = 0u; ix < params.size(); ix++) + { + const auto& ci = params[ix]; + + if (params[ix].taskShader.shader != nullptr) { + if (!features.taskShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + } + + //check extensions here + //it SEEMS like createGraphicsPipeline does, but it does it in a weird way I don't understand? + //geo and tess are just flat disabled?? + if (!features.meshShader) { + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + return false; + } + + auto renderpass = ci.renderpass; + if (!renderpass->wasCreatedBy(this)) { + NBL_LOG_ERROR("Invalid renderpass was given (params[%u])", ix); + return false; + } + + + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); + + SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); + trimTask.insertEntryPoint(ci.taskShader, hlsl::ShaderStage::ESS_TASK); + trimTask.insertEntryPoint(ci.meshShader, hlsl::ShaderStage::ESS_MESH); + trimTask.insertEntryPoint(ci.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + newParams[ix].taskShader = trimTask.trim(ci.taskShader, trimmedShaders); + newParams[ix].meshShader = trimTask.trim(ci.meshShader, trimmedShaders); + newParams[ix].fragmentShader = trimTask.trim(ci.fragmentShader, trimmedShaders); + } + createMeshPipelines_impl(pipelineCache, newParams, output, specConstantValidation); + + for (auto i = 0u; i < params.size(); i++) + { + if (!output[i]) + { + NBL_LOG_ERROR("MeshPipeline was not created (params[%u])", i); + return false; + } + else + { + m_logger.log("shader[%d] mesh debug name - %s\n", nbl::system::ILogger::ELL_DEBUG, i, params[i].meshShader.shader->getDebugName()); + // TODO: set pipeline debug name thats a concatenation of all active stages' shader file path hints + } + } + return true; +} + bool ILogicalDevice::createGraphicsPipelines( IGPUPipelineCache* const pipelineCache, const std::span params, @@ -888,88 +1054,13 @@ bool ILogicalDevice::createGraphicsPipelines( return false; } - const auto& rasterParams = ci.cached.rasterization; - if (rasterParams.alphaToOneEnable && !features.alphaToOne) - { - NBL_LOG_ERROR("Feature `alpha to one` is not enabled"); - return false; - } - if (rasterParams.depthBoundsTestEnable && !features.depthBounds) - { - NBL_LOG_ERROR("Feature `depth bounds` is not enabled"); - return false; - } - - const auto samples = 0x1u << rasterParams.samplesLog2; - // TODO: loads more validation on extra parameters here! // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-lineRasterizationMode-02766 // TODO: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01505 // baiscally the AMD version must have the rasterization samples equal to the maximum of all attachment samples counts - const auto& passParams = renderpass->getCreationParameters(); - const auto& subpass = passParams.subpasses[ci.cached.subpassIx]; - if (subpass.viewMask) - { - /* - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06047 - if (!limits.multiviewTessellationShader && .test(tesS_contrOL)) - return false; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06048 - if (!limits.multiviewGeomtryShader && .test(GEOMETRY)) - return false; - */ - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06578 - //NOTE: index of MSB must be less than maxMultiviewViewCount; wrong negation here, should be >= - if (hlsl::findMSB(subpass.viewMask) > limits.maxMultiviewViewCount) - { - NBL_LOG_ERROR("Invalid viewMask (params[%u])", ix); - return false; - } - } - if (subpass.depthStencilAttachment.render.used()) - { - const auto& attachment = passParams.depthStencilAttachments[subpass.depthStencilAttachment.render.attachmentIndex]; - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - bool sampleCountNeedsToMatch = !features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01411 - if (/*detect NV version && */(rasterParams.depthTestEnable() || rasterParams.stencilTestEnable() || rasterParams.depthBoundsTestEnable)) - sampleCountNeedsToMatch = true; - if (sampleCountNeedsToMatch && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid depth stencil attachment (params[%u])", ix); - return false; - } - } - for (auto i = 0; i < IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments; i++) - { - const auto& render = subpass.colorAttachments[i].render; - if (render.used()) - { - const auto& attachment = passParams.colorAttachments[render.attachmentIndex]; - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-renderPass-06041 - if (ci.cached.blend.blendParams[i].blendEnabled() && !getPhysicalDevice()->getImageFormatUsagesOptimalTiling()[attachment.format].attachmentBlend) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-multisampledRenderToSingleSampled-06853 - if (!features.mixedAttachmentSamples /*&& !features.multisampledRenderToSingleSampled*/ && attachment.samples != samples) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-subpass-01412 - if (/*detect NV version && */(attachment.samples > samples)) - { - NBL_LOG_ERROR("Invalid color attachment (params[%u].colorAttachments[%u])", ix, i); - return false; - } - } - } + MeshGraphicsCommonValidation(renderpass, ci.cached.subpassIx, limits, features, ci.cached.rasterization, ci.cached.blend, m_logger, getPhysicalDevice()->getImageFormatUsagesOptimalTiling()); SpirvTrimTask trimTask(m_spirvTrimmer.get(), m_logger); trimTask.insertEntryPoint(ci.vertexShader, hlsl::ShaderStage::ESS_VERTEX); From bf48d8ea1a6e86b51a9efc19002d7aff22437017 Mon Sep 17 00:00:00 2001 From: Corey Date: Fri, 3 Oct 2025 17:04:12 -0500 Subject: [PATCH 131/140] graphics are stable - mesh prototype complete i PROBABLY messed up something in mesh. committing so I can keep track of changes while I test in the example Signed-off-by: Corey --- include/nbl/asset/ICPUMeshPipeline.h | 16 +- include/nbl/asset/IMeshPipeline.h | 13 -- include/nbl/video/IGPUCommandBuffer.h | 16 +- include/nbl/video/IGPUCommandPool.h | 36 ++-- include/nbl/video/IGPUMeshPipeline.h | 3 +- include/nbl/video/asset_traits.h | 13 +- src/nbl/CMakeLists.txt | 3 +- src/nbl/video/CVulkanCommandBuffer.cpp | 11 +- src/nbl/video/CVulkanCommandBuffer.h | 3 +- src/nbl/video/CVulkanLogicalDevice.cpp | 241 ++++++++++++++++++------ src/nbl/video/CVulkanLogicalDevice.h | 6 +- src/nbl/video/CVulkanPhysicalDevice.cpp | 18 ++ src/nbl/video/IGPUCommandBuffer.cpp | 52 ++++- 13 files changed, 316 insertions(+), 115 deletions(-) diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index 7a0aa07e8d..ec679a48be 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -39,9 +39,11 @@ class ICPUMeshPipeline final : public ICPUPipeline getSpecInfos(const hlsl::ShaderStage stage) const override final { - const auto stageIndex = stageToIndex(stage); - if (stageIndex != -1) - return { &m_specInfos[stageIndex], 1 }; + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 2 }; + } return {}; } @@ -53,9 +55,11 @@ class ICPUMeshPipeline final : public ICPUPipeline& stagePresence) { - /* - VUID-VkPipelineShaderStageCreateInfo-stage-02091 - If the meshShaders feature is not enabled, stage must not be VK_SHADER_STAGE_MESH_BIT_EXT - - VUID-VkPipelineShaderStageCreateInfo-stage-02092 - If the taskShaders feature is not enabled, stage must not be VK_SHADER_STAGE_TASK_BIT_EXT - - need to check extentions here - - maybe assert vertex,geo, and tess arent used here? - */ - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096 if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) { return false; diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index bb6460754a..26e9b77b12 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -328,8 +328,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject bool copyAccelerationStructureFromMemory(const AccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo); //! state setup - bool bindComputePipeline(const IGPUComputePipeline* const pipeline); bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline); + bool bindComputePipeline(const IGPUComputePipeline* const pipeline); + bool bindMeshPipeline(const IGPUMeshPipeline* const pipeline); bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline); bool bindDescriptorSets( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -587,6 +588,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject inline const core::unordered_map& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; } const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; } const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; } + const IGPUMeshPipeline* getBoundMeshPipeline() const { return m_boundMeshPipeline; } const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; } protected: @@ -670,8 +672,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst) = 0; virtual bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst) = 0; - virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0; + virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0; + virtual bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) = 0; virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0; virtual bool bindDescriptorSets_impl( const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, @@ -750,9 +753,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_boundDescriptorSetsRecord.clear(); m_TLASTrackingOps.clear(); - m_boundGraphicsPipeline= nullptr; - m_boundComputePipeline= nullptr; - m_boundRayTracingPipeline= nullptr; + m_boundGraphicsPipeline = nullptr; + m_boundComputePipeline = nullptr; + m_boundMeshPipeline = nullptr; + m_boundRayTracingPipeline = nullptr; m_haveRtPipelineStackSize = false; m_commandList.head = nullptr; @@ -770,6 +774,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_TLASTrackingOps.clear(); m_boundGraphicsPipeline= nullptr; m_boundComputePipeline= nullptr; + m_boundMeshPipeline = nullptr; m_boundRayTracingPipeline= nullptr; m_haveRtPipelineStackSize = false; releaseResourcesBackToPool_impl(); @@ -931,6 +936,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject const IGPUGraphicsPipeline* m_boundGraphicsPipeline; const IGPUComputePipeline* m_boundComputePipeline; + const IGPUMeshPipeline* m_boundMeshPipeline; const IGPURayTracingPipeline* m_boundRayTracingPipeline; IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList = {}; diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index 0424ad83bd..4a89bc0872 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -8,8 +8,9 @@ #include "nbl/video/IEvent.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/video/IGPUComputePipeline.h" #include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/video/IGPUComputePipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" #include "nbl/video/IGPUFramebuffer.h" #include "nbl/video/IQueryPool.h" @@ -125,7 +126,6 @@ class IGPUCommandPool : public IBackendObject class CBeginRenderPassCmd; class CPipelineBarrierCmd; class CBindDescriptorSetsCmd; - class CBindComputePipelineCmd; class CUpdateBufferCmd; class CResetQueryPoolCmd; class CWriteTimestampCmd; @@ -133,6 +133,9 @@ class IGPUCommandPool : public IBackendObject class CEndQueryCmd; class CCopyQueryPoolResultsCmd; class CBindGraphicsPipelineCmd; + class CBindComputePipelineCmd; + class CBindMeshPipelineCmd; + class CBindRayTracingPipelineCmd; class CPushConstantsCmd; class CBindVertexBuffersCmd; class CCopyBufferCmd; @@ -155,7 +158,6 @@ class IGPUCommandPool : public IBackendObject class CCopyAccelerationStructureToOrFromMemoryCmd; // for both vkCmdCopyAccelerationStructureToMemoryKHR and vkCmdCopyMemoryToAccelerationStructureKHR class CTraceRaysCmd; class CTraceRaysIndirectCmd; - class CBindRayTracingPipelineCmd; protected: IGPUCommandPool(core::smart_refctd_ptr&& dev, const core::bitflag _flags, const uint8_t _familyIx) @@ -529,15 +531,6 @@ class IGPUCommandPool::CBindDescriptorSetsCmd final : public IFixedSizeCommand m_sets[IGPUPipelineLayout::DESCRIPTOR_SET_COUNT]; }; -class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand -{ - public: - CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} - - private: - core::smart_refctd_ptr m_pipeline; -}; - class IGPUCommandPool::CUpdateBufferCmd final : public IFixedSizeCommand { public: @@ -595,6 +588,7 @@ class IGPUCommandPool::CCopyQueryPoolResultsCmd final : public IFixedSizeCommand core::smart_refctd_ptr m_dstBuffer; }; +//i dont really understand how to mirror this with mesh pipeline yet class IGPUCommandPool::CBindGraphicsPipelineCmd final : public IFixedSizeCommand { public: @@ -604,6 +598,24 @@ class IGPUCommandPool::CBindGraphicsPipelineCmd final : public IFixedSizeCommand core::smart_refctd_ptr m_pipeline; }; +class IGPUCommandPool::CBindComputePipelineCmd final : public IFixedSizeCommand +{ +public: + CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + +class IGPUCommandPool::CBindMeshPipelineCmd final : public IFixedSizeCommand +{ +public: + CBindMeshPipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + +private: + core::smart_refctd_ptr m_pipeline; +}; + class IGPUCommandPool::CPushConstantsCmd final : public IFixedSizeCommand { public: diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h index 37bf409355..4daddab69f 100644 --- a/include/nbl/video/IGPUMeshPipeline.h +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -73,6 +73,7 @@ namespace nbl::video inline SSpecializationValidationResult valid() const { + //this seems like the place to check if the mesh extension exists, but the raytracing pipeline doesnt do it here if (!layout) return {}; SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; @@ -102,7 +103,7 @@ namespace nbl::video if (!hasRequiredStages(stagePresence)) return {}; - //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly? + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? return retval; } diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index d98274778c..c4279a5cad 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -9,10 +9,6 @@ #include "nbl/video/IGPUBufferView.h" #include "nbl/asset/ICPUDescriptorSet.h" #include "nbl/video/IGPUDescriptorSet.h" -#include "nbl/asset/ICPUComputePipeline.h" -#include "nbl/video/IGPUComputePipeline.h" -#include "nbl/asset/ICPUGraphicsPipeline.h" -#include "nbl/video/IGPUGraphicsPipeline.h" #include "nbl/asset/ICPUSampler.h" #include "nbl/video/IGPUSampler.h" #include "nbl/asset/ICPUImageView.h" @@ -21,11 +17,14 @@ #include "nbl/video/IGPUAccelerationStructure.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/video/IGPUPolygonGeometry.h" -#include "nbl/asset/ICPURayTracingPipeline.h" -#include "nbl/video/IGPURayTracingPipeline.h" - +#include "nbl/asset/ICPUGraphicsPipeline.h" +#include "nbl/video/IGPUGraphicsPipeline.h" +#include "nbl/asset/ICPUComputePipeline.h" +#include "nbl/video/IGPUComputePipeline.h" #include "nbl/asset/ICPUMeshPipeline.h" #include "nbl/video/IGPUMeshPipeline.h" +#include "nbl/asset/ICPURayTracingPipeline.h" +#include "nbl/video/IGPURayTracingPipeline.h" namespace nbl::video diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 76e046848c..3bc379c08f 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -266,7 +266,6 @@ set(NBL_VIDEO_SOURCES video/CVulkanDescriptorSetLayout.cpp video/CVulkanPipelineLayout.cpp video/CVulkanPipelineCache.cpp - video/CVulkanComputePipeline.cpp video/CVulkanDescriptorPool.cpp video/CVulkanDescriptorSet.cpp video/CVulkanMemoryAllocation.cpp @@ -279,6 +278,8 @@ set(NBL_VIDEO_SOURCES video/CVulkanConnection.cpp video/CVulkanPhysicalDevice.cpp video/CVulkanGraphicsPipeline.cpp + video/CVulkanComputePipeline.cpp + video/CVulkanMeshPipeline.cpp video/CVulkanRayTracingPipeline.cpp video/CVulkanEvent.cpp video/CSurfaceVulkan.cpp diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index a55c3a1e7b..c73557da02 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -406,15 +406,20 @@ bool CVulkanCommandBuffer::copyAccelerationStructureFromMemory_impl(const asset: return true; } +bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) +{ + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); + return true; +} + bool CVulkanCommandBuffer::bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) { getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, static_cast(pipeline)->getInternalObject()); return true; } -bool CVulkanCommandBuffer::bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) -{ - getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); +bool CVulkanCommandBuffer::bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) { + getFunctionTable().vkCmdBindPipeline(m_cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, static_cast(pipeline)->getInternalObject()); return true; } diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 9383585b23..41e6fa5e6f 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -181,8 +181,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst); bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding& src, IGPUAccelerationStructure* dst); - bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) override; + bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) override; + bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) override; bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) override; bool bindDescriptorSets_impl(const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout, const uint32_t firstSet, const uint32_t descriptorSetCount, const IGPUDescriptorSet* const* const pDescriptorSets, const uint32_t dynamicOffsetCount = 0u, const uint32_t* const dynamicOffsets = nullptr) override; bool pushConstants_impl(const IGPUPipelineLayout* const layout, const core::bitflag stageFlags, const uint32_t offset, const uint32_t size, const void* const pValues) override; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 928497ccaf..cd24704df8 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1121,7 +1121,7 @@ VkPipelineShaderStageCreateInfo getVkShaderStageCreateInfoFrom( if (requireFullSubgroups) { - assert(stage==hlsl::ShaderStage::ESS_COMPUTE/*TODO: Or Mesh Or Task*/); + assert(stage==hlsl::ShaderStage::ESS_COMPUTE || stage == hlsl::ShaderStage::ESS_MESH || stage == hlsl::ShaderStage::ESS_TASK); retval.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT; } } @@ -1360,13 +1360,132 @@ core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures con void CVulkanLogicalDevice::createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, + const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) { const auto& features = getEnabledFeatures(); + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = static_cast(vk_dynamicStates.size()), + .pDynamicStates = vk_dynamicStates.data() + }; + core::vector vk_viewportStates(createInfos.size(), { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + }); + + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + //not used in mesh pipelines + for (auto& outCreateInfo : vk_createInfos) { + outCreateInfo.pVertexInputState = nullptr; + outCreateInfo.pInputAssemblyState = nullptr; + outCreateInfo.pTessellationState = nullptr; + } + auto outCreateInfo = vk_createInfos.data(); + + const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr}); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) + { + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.taskShader, hlsl::ShaderStage::ESS_TASK); + processSpecShader(info.meshShader, hlsl::ShaderStage::ESS_MESH); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + outCreateInfo++; + } + + auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) + { + for (size_t i = 0ull; i < createInfos.size(); ++i) + { + const auto& createInfo = createInfos[i]; + const VkPipeline vk_pipeline = vk_pipelines[i]; + // break the lifetime cause of the aliasing + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); + debugNameBuilder.str(""); + auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; + buildDebugName(createInfo.taskShader, hlsl::ESS_TASK); + buildDebugName(createInfo.meshShader, hlsl::ESS_MESH); + buildDebugName(createInfo.fragmentShader, hlsl::ESS_FRAGMENT); + output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); + } + } + else + std::fill_n(output, vk_createInfos.size(), nullptr); } void CVulkanLogicalDevice::createGraphicsPipelines_impl( @@ -1436,6 +1555,58 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); + auto outCreateInfo = vk_createInfos.data(); + auto outVertexInput = vk_vertexInput.data(); + auto outInputBinding = vk_inputBinding.data(); + auto outInputAttribute = vk_inputAttribute.data(); + auto outTessellation = vk_tessellation.data(); + auto outInputAssembly = vk_inputAssembly.data(); + + //ill acknowledge this additional looping is a little ugly + //input and tess + for (const auto& info : createInfos) + { + { + const auto& vertexInputParams = info.cached.vertexInput; + outVertexInput->pVertexBindingDescriptions = outInputBinding; + for (auto b = 0u; b < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; b++) + if (vertexInputParams.enabledBindingFlags & (1 << b)) + { + outInputBinding->binding = b; + outInputBinding->stride = vertexInputParams.bindings[b].stride; + outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); + outInputBinding++; + } + outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions, outInputBinding); + outVertexInput->pVertexAttributeDescriptions = outInputAttribute; + for (auto l = 0u; l < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; l++) + if (vertexInputParams.enabledAttribFlags & (1 << l)) + { + outInputAttribute->location = l; + outInputAttribute->binding = vertexInputParams.attributes[l].binding; + outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); + outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; + outInputAttribute++; + } + outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions, outInputAttribute); + } + outCreateInfo->pVertexInputState = outVertexInput++; + { + const auto& primAssParams = info.cached.primitiveAssembly; + outInputAssembly->topology = static_cast(primAssParams.primitiveType); + outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; + } + outCreateInfo->pInputAssemblyState = outInputAssembly++; + + if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) + { + outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; + outCreateInfo->pTessellationState = outTessellation++; + } + + outCreateInfo++; + } + const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); @@ -1447,7 +1618,7 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( core::vector vk_specializationMapEntry(validation.count); core::vector specializationData(validation.dataSize); - auto outCreateInfo = vk_createInfos.data(); + outCreateInfo = vk_createInfos.data(); auto outShaderStage = vk_shaderStage.data(); auto outEntryPoints = entryPoints.data(); auto outShaderModule = vk_shaderModule.data(); @@ -1455,14 +1626,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); auto outSpecData = specializationData.data(); - auto outVertexInput = vk_vertexInput.data(); - auto outInputBinding = vk_inputBinding.data(); - auto outInputAttribute = vk_inputAttribute.data(); - auto outTessellation = vk_tessellation.data(); - auto outInputAssembly = vk_inputAssembly.data(); - - //graphics only stuff + //shader for (const auto& info : createInfos) { outCreateInfo->pStages = outShaderStage; @@ -1470,8 +1635,17 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( { if (spec.shader) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); } }; processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -1480,52 +1654,9 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - // when dealing with mesh shaders, the vertex input and assembly state will be null - { - { - const auto& vertexInputParams = info.cached.vertexInput; - outVertexInput->pVertexBindingDescriptions = outInputBinding; - for (auto b=0u; bbinding = b; - outInputBinding->stride = vertexInputParams.bindings[b].stride; - outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); - outInputBinding++; - } - outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions,outInputBinding); - outVertexInput->pVertexAttributeDescriptions = outInputAttribute; - for (auto l=0u; llocation = l; - outInputAttribute->binding = vertexInputParams.attributes[l].binding; - outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); - outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; - outInputAttribute++; - } - outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions,outInputAttribute); - } - outCreateInfo->pVertexInputState = outVertexInput++; - { - const auto& primAssParams = info.cached.primitiveAssembly; - outInputAssembly->topology = static_cast(primAssParams.primitiveType); - outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; - } - outCreateInfo->pInputAssemblyState = outInputAssembly++; - - if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) - { - outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; - outCreateInfo->pTessellationState = outTessellation++; - } - } - outCreateInfo++; } - - auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index f5e099cf92..0d1f75918d 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -29,8 +29,8 @@ #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" -#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" @@ -291,13 +291,13 @@ class CVulkanLogicalDevice final : public ILogicalDevice ) override; void createGraphicsPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, + const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, + const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; //final? diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index da86d7c9d9..9f0d00ff85 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -732,6 +732,12 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperativeMatrixFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR }; VkPhysicalDeviceMaintenance5FeaturesKHR maintenance5Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_FEATURES_KHR }; VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT graphicsPipelineLibraryFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT }; + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT }; + + //do we hate macros? +#define AddExtensionToPNextIfSupported(name, feat) if (isExtensionSupported(name)) addToPNextChain(&feat); + + AddExtensionToPNextIfSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME, meshShaderFeatures); if (isExtensionSupported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME)) addToPNextChain(&conditionalRenderingFeatures); @@ -817,6 +823,18 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart features.geometryShader = deviceFeatures.features.geometryShader; features.tessellationShader = deviceFeatures.features.tessellationShader; + + //check if features are existant first + //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features + //depends on the less obvious properties + if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + features.meshShader = meshShaderFeatures.meshShader; + features.taskShader = meshShaderFeatures.taskShader; + //TODO + //meshShaderFeatures.primitiveFragmentShadingRateMeshShader; + //meshShaderFeatures.meshShaderQueries; + //meshShaderFeatures.multiviewMeshShader; + } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) RETURN_NULL_PHYSICAL_DEVICE; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 1f619666ab..f080202649 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -963,6 +963,32 @@ template NBL_API2 bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const IGPUTopLevelAccelerationStructure::DeviceCopyFromMemoryInfo&); +bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +{ + // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, + // we cannot check renderpass-pipeline compatibility here. + // And checking before every drawcall would be performance suicide. + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT)) + return false; + + if (!pipeline || !this->isCompatibleDevicewise(pipeline)) + { + NBL_LOG_ERROR("incompatible pipeline device!"); + return false; + } + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_boundGraphicsPipeline = pipeline; + + m_noCommands = false; + return bindGraphicsPipeline_impl(pipeline); +} + bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pipeline) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) @@ -988,7 +1014,7 @@ bool IGPUCommandBuffer::bindComputePipeline(const IGPUComputePipeline* const pip return true; } -bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline) +bool IGPUCommandBuffer::bindMeshPipeline(const IGPUMeshPipeline* const pipeline) { // Because binding of the Gfx pipeline can happen outside of a Renderpass Scope, // we cannot check renderpass-pipeline compatibility here. @@ -1002,18 +1028,19 @@ bool IGPUCommandBuffer::bindGraphicsPipeline(const IGPUGraphicsPipeline* const p return false; } - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(pipeline))) { NBL_LOG_ERROR("out of host memory!"); return false; } - m_boundGraphicsPipeline = pipeline; + m_boundMeshPipeline = pipeline; m_noCommands = false; - return bindGraphicsPipeline_impl(pipeline); + return bindMeshPipeline_impl(pipeline); } + bool IGPUCommandBuffer::bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT)) @@ -1421,10 +1448,19 @@ bool IGPUCommandBuffer::copyQueryPoolResults( return copyQueryPoolResults_impl(queryPool, firstQuery, queryCount, dstBuffer, stride, flags); } - bool IGPUCommandBuffer::dispatch(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) { - if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + /* + * potentially do something like this here. + const bool whollyInsideRenderpass = m_recordingFlags.hasFlags(USAGE::RENDER_PASS_CONTINUE_BIT); + auto allowedQueueCaps = queue_flags_t::GRAPHICS_BIT; + auto allowedRenderpassScope = inside; + if (!whollyInsideRenderpass) + allowedQueueCaps = queue_flags_t::COMPUTE_BIT; + allowedRenderpassScope = outside; + */ + + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT | queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::BOTH)) return false; if (groupCountX==0 || groupCountY==0 || groupCountZ==0) @@ -1446,9 +1482,9 @@ bool IGPUCommandBuffer::dispatch(const uint32_t groupCountX, const uint32_t grou bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBinding& binding) { - if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT | queue_flags_t::GRAPHICS_BIT, RENDERPASS_SCOPE::BOTH)) return false; - + //side mission - find out what 4 is. may impact mesh in a way im not expecting if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) return false; From 49b2814588b5d2937ef360da12d79b8d7849f9a4 Mon Sep 17 00:00:00 2001 From: Corey Date: Thu, 9 Oct 2025 04:49:06 -0500 Subject: [PATCH 132/140] still bug catching in example Signed-off-by: Corey --- include/nbl/asset/ICPUMeshPipeline.h | 2 +- include/nbl/video/IGPUMeshPipeline.h | 2 -- src/nbl/video/CVulkanPhysicalDevice.cpp | 35 ++++++++++++++++--- .../device_capabilities/device_features.json | 10 ++++++ 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index ec679a48be..b21a44b82c 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -42,7 +42,7 @@ class ICPUMeshPipeline final : public ICPUPipeline= renderpass->getSubpassCount()) diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 9f0d00ff85..03025b178d 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -828,12 +828,39 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features //depends on the less obvious properties if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { - features.meshShader = meshShaderFeatures.meshShader; features.taskShader = meshShaderFeatures.taskShader; + features.meshShader = meshShaderFeatures.meshShader; //TODO - //meshShaderFeatures.primitiveFragmentShadingRateMeshShader; - //meshShaderFeatures.meshShaderQueries; - //meshShaderFeatures.multiviewMeshShader; + //VkBool32 multiviewMeshShader; + //VkBool32 primitiveFragmentShadingRateMeshShader; + //VkBool32 meshShaderQueries; + + //VkPhysicalDeviceMeshShaderPropertiesEXT + //#define LIMIT_INIT_MESH(limitMemberName) properties.limits.limitMemberName = meshShaderProperties.limitMemberName + //LIMIT_INIT_MESH(maxTaskWorkGroupTotalCount); + //LIMIT_INIT_MESH(maxTaskWorkGroupInvocations); + //LIMIT_INIT_MESH(maxTaskPayloadSize); + //LIMIT_INIT_MESH(maxTaskSharedMemorySize); + //LIMIT_INIT_MESH(maxTaskPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshWorkGroupInvocations); + //LIMIT_INIT_MESH(maxMeshSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputComponents); + //LIMIT_INIT_MESH(maxMeshOutputVertices); + //LIMIT_INIT_MESH(maxMeshOutputPrimitives); + //LIMIT_INIT_MESH(maxMeshOutputLayers); + //LIMIT_INIT_MESH(maxMeshMultiviewViewCount); + //LIMIT_INIT_MESH(maxMeshOutputPerVertexGranularity); + //LIMIT_INIT_MESH(maxMeshOutputPerPrimitiveGranularity); + + //for(uint8_t i = 0; i < 3; i++){ + // LIMIT_INIT_MESH(maxTaskWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxTaskWorkGroupSize[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupSize[i]); + //} + //#undef LIMIT_INIT_MESH } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) diff --git a/src/nbl/video/device_capabilities/device_features.json b/src/nbl/video/device_capabilities/device_features.json index 5e4775e9b4..3cdaee820d 100644 --- a/src/nbl/video/device_capabilities/device_features.json +++ b/src/nbl/video/device_capabilities/device_features.json @@ -50,6 +50,16 @@ "type": "bool", "name": "tessellationShader", "value": false + }, + { + "type": "bool", + "name": "meshShader", + "value": false + }, + { + "type": "bool", + "name": "taskShader", + "value": false } ] }, From 0842f40f2fe5151f0278206accb9190d7026b9cd Mon Sep 17 00:00:00 2001 From: Corey Date: Mon, 8 Dec 2025 11:38:24 -0600 Subject: [PATCH 133/140] lingering uncommitted changes Signed-off-by: Corey --- 3rdparty/CMakeLists.txt | 2 + .../MonoDeviceApplication.hpp | 2 + src/nbl/CMakeLists.txt | 2 + src/nbl/video/CVulkanPhysicalDevice.cpp | 9 ++ src/nbl/video/ILogicalDevice.cpp | 2 +- .../device_capabilities/device_limits.json | 117 ++++++++++++++++++ 6 files changed, 133 insertions(+), 1 deletion(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index a6228b01de..4ef2cd887f 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -95,7 +95,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() # boost +set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE) #forcing boost to be in release add_subdirectory(boost boost EXCLUDE_FROM_ALL) +set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "" FORCE) #restoring config from boost set(SPIRV_HEADERS_SKIP_INSTALL ON CACHE INTERNAL "Skip SPIRV-Headers install") set(SPIRV_HEADERS_SKIP_EXAMPLES ON CACHE INTERNAL "Skip SPIRV-Headers examples") diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp index a3a169d7b7..4e0e6c759a 100644 --- a/include/nbl/application_templates/MonoDeviceApplication.hpp +++ b/include/nbl/application_templates/MonoDeviceApplication.hpp @@ -74,6 +74,8 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication const auto supportedPreferredFormats = getPreferredDeviceFeatures().intersectWith(m_physicalDevice->getFeatures()); params.featuresToEnable = getRequiredDeviceFeatures().unionWith(supportedPreferredFormats); + params.featuresToEnable.meshShader = true; + params.featuresToEnable.taskShader = true; m_device = m_physicalDevice->createLogicalDevice(std::move(params)); if (!m_device) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 3bc379c08f..6f5a6aabb4 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -281,6 +281,8 @@ set(NBL_VIDEO_SOURCES video/CVulkanComputePipeline.cpp video/CVulkanMeshPipeline.cpp video/CVulkanRayTracingPipeline.cpp + video/CVulkanComputePipeline.cpp + video/CVulkanMeshPipeline.cpp video/CVulkanEvent.cpp video/CSurfaceVulkan.cpp diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 03025b178d..1bc03b8c48 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -1547,6 +1547,9 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic VkPhysicalDeviceRayQueryFeaturesKHR rayQueryFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR,nullptr }; REQUIRE_EXTENSION_IF(enabledFeatures.rayQuery,VK_KHR_RAY_QUERY_EXTENSION_NAME,&rayQueryFeatures); // feature dependency taken care of + VkPhysicalDeviceMeshShaderFeaturesEXT meshShaderFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT, nullptr}; + REQUIRE_EXTENSION_IF(enabledFeatures.meshShader, VK_EXT_MESH_SHADER_EXTENSION_NAME, &meshShaderFeatures); + VkPhysicalDeviceShaderSMBuiltinsFeaturesNV shaderSMBuiltinsFeaturesNV = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_FEATURES_NV,nullptr }; enableExtensionIfAvailable(VK_NV_SHADER_SM_BUILTINS_EXTENSION_NAME,&shaderSMBuiltinsFeaturesNV); @@ -1863,6 +1866,12 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic rayTracingPositionFetchFeatures.rayTracingPositionFetch = limits.rayTracingPositionFetch; + meshShaderFeatures.taskShader = enabledFeatures.taskShader; + meshShaderFeatures.meshShader = enabledFeatures.meshShader; + meshShaderFeatures.primitiveFragmentShadingRateMeshShader = VK_FALSE;//needs to be explicitly set? + meshShaderFeatures.meshShaderQueries = VK_FALSE; + meshShaderFeatures.multiviewMeshShader = VK_FALSE; + //shaderSMBuiltinsFeaturesNV [LIMIT SO ENABLE EVERYTHING BY DEFAULT] representativeFragmentTestFeatures.representativeFragmentTest = enabledFeatures.representativeFragmentTest; diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 561574b83d..ff783eaa3d 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -950,7 +950,7 @@ bool ILogicalDevice::createMeshPipelines( if (params[ix].taskShader.shader != nullptr) { if (!features.taskShader) { - NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); + NBL_LOG_ERROR("Feature `task shader` is not enabled"); return false; } } diff --git a/src/nbl/video/device_capabilities/device_limits.json b/src/nbl/video/device_capabilities/device_limits.json index e8bc3a3af4..b1f8852f00 100644 --- a/src/nbl/video/device_capabilities/device_limits.json +++ b/src/nbl/video/device_capabilities/device_limits.json @@ -339,6 +339,123 @@ } ] }, + { + "comment": ["VkPhysicalDeviceMeshShaderPropertiesEXT - task"], + "entries":[ + { + "type": "uint32_t", + "name": "maxTaskWorkGroupTotalCount", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxTaskWorkGroupCount[3]", + "value": "{MinMaxWorkgroupCount,MinMaxWorkgroupCount,MinMaxWorkgroupCount}" + }, + { + "type": "uint32_t", + "name": "maxTaskWorkGroupInvocations", + "value": "MinMaxWorkgroupInvocations" + }, + { + "type": "uint32_t", + "name": "maxTaskWorkGroupSize[3]", + "value": "{MinMaxWorkgroupInvocations,MinMaxWorkgroupInvocations,64u}" + }, + { + "type": "uint32_t", + "name": "maxTaskPayloadSize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxTaskSharedMemorySize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxTaskPayloadAndSharedMemorySize", + "value": 0 + } + ] + }, + { + "comment": ["VkPhysicalDeviceMeshShaderPropertiesEXT - mesh"], + "entries":[ + { + "type": "uint32_t", + "name": "maxMeshWorkGroupCount[3]", + "value": "{MinMaxWorkgroupCount,MinMaxWorkgroupCount,MinMaxWorkgroupCount}" + }, + { + "type": "uint32_t", + "name": "maxMeshWorkGroupInvocations", + "value": "MinMaxWorkgroupInvocations" + }, + { + "type": "uint32_t", + "name": "maxMeshWorkGroupSize[3]", + "value": "{MinMaxWorkgroupInvocations,MinMaxWorkgroupInvocations,64u}" + }, + { + "type": "uint32_t", + "name": "maxMeshSharedMemorySize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshPayloadAndSharedMemorySize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputMemorySize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshPayloadAndOutputMemorySize", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputComponents", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputVertices", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputPrimitives", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputLayers", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshMultiviewViewCount", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputPerVertexGranularity", + "value": 0 + }, + { + "type": "uint32_t", + "name": "maxMeshOutputPerPrimitiveGranularity", + "value": 0 + } + + + ] + }, { "comment": [], "entries": [ From ed1c52789c4bdda28e8f7613f7d61aee83b5389a Mon Sep 17 00:00:00 2001 From: Corey Date: Thu, 2 Oct 2025 21:36:17 -0500 Subject: [PATCH 134/140] i still need to reconcile mesh with compute. its based off graphics right now. graphics is stable tho Signed-off-by: Corey --- include/nbl/asset/ICPUMeshPipeline.h | 16 +- include/nbl/asset/IMeshPipeline.h | 13 ++ include/nbl/video/IGPUMeshPipeline.h | 5 +- include/nbl/video/asset_traits.h | 3 + src/nbl/video/CVulkanLogicalDevice.cpp | 237 ++++++------------------- src/nbl/video/CVulkanLogicalDevice.h | 4 +- src/nbl/video/ILogicalDevice.cpp | 2 +- 7 files changed, 81 insertions(+), 199 deletions(-) diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index b21a44b82c..7a0aa07e8d 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -39,11 +39,9 @@ class ICPUMeshPipeline final : public ICPUPipeline getSpecInfos(const hlsl::ShaderStage stage) const override final { - switch (stage) { - case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; - case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; - case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 1 }; - } + const auto stageIndex = stageToIndex(stage); + if (stageIndex != -1) + return { &m_specInfos[stageIndex], 1 }; return {}; } @@ -55,11 +53,9 @@ class ICPUMeshPipeline final : public ICPUPipeline& stagePresence) { + /* + VUID-VkPipelineShaderStageCreateInfo-stage-02091 + If the meshShaders feature is not enabled, stage must not be VK_SHADER_STAGE_MESH_BIT_EXT + + VUID-VkPipelineShaderStageCreateInfo-stage-02092 + If the taskShaders feature is not enabled, stage must not be VK_SHADER_STAGE_TASK_BIT_EXT + + need to check extentions here + + maybe assert vertex,geo, and tess arent used here? + */ + + // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096 if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) { return false; diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h index 794eb68ee1..37bf409355 100644 --- a/include/nbl/video/IGPUMeshPipeline.h +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -73,10 +73,11 @@ namespace nbl::video inline SSpecializationValidationResult valid() const { - //this seems like the place to check if the mesh extension exists, but the raytracing pipeline doesnt do it here if (!layout) return {}; SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; + if (!layout) + return {}; // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576 if (!renderpass || cached.subpassIx >= renderpass->getSubpassCount()) @@ -101,7 +102,7 @@ namespace nbl::video if (!hasRequiredStages(stagePresence)) return {}; - //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly? return retval; } diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index c4279a5cad..b8e205aa4c 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -26,6 +26,9 @@ #include "nbl/asset/ICPURayTracingPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" +#include "nbl/asset/ICPUMeshPipeline.h" +#include "nbl/video/IGPUMeshPipeline.h" + namespace nbl::video { diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index cd24704df8..abb018e16c 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1360,132 +1360,13 @@ core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures con void CVulkanLogicalDevice::createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span createInfos, + const std::span params, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) { const auto& features = getEnabledFeatures(); - const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; - core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); - - core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); - core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); - core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); - core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); - core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); - - core::vector vk_dynamicStates = getDefaultDynamicStates(features); - - const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0u, - .dynamicStateCount = static_cast(vk_dynamicStates.size()), - .pDynamicStates = vk_dynamicStates.data() - }; - core::vector vk_viewportStates(createInfos.size(), { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway - .flags = 0, // must be 0 - .viewportCount = 0, - .pViewports = nullptr, - .scissorCount = 0, - .pScissors = nullptr, - }); - - PopulateMeshGraphicsCommonData( - createInfos, vk_createInfos, - - vk_viewportStates, - vk_rasterizationStates, - vk_multisampleStates, - vk_depthStencilStates, - vk_colorBlendStates, - vk_colorBlendAttachmentStates, - - vk_dynamicStates, vk_dynamicStateCreateInfo - ); - - //not used in mesh pipelines - for (auto& outCreateInfo : vk_createInfos) { - outCreateInfo.pVertexInputState = nullptr; - outCreateInfo.pInputAssemblyState = nullptr; - outCreateInfo.pTessellationState = nullptr; - } - auto outCreateInfo = vk_createInfos.data(); - - const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; - core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); - core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); - core::vector entryPoints(maxShaderStages); - core::vector vk_requiredSubgroupSize(maxShaderStages, { - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr}); - core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); - core::vector vk_specializationMapEntry(validation.count); - core::vector specializationData(validation.dataSize); - auto outShaderStage = vk_shaderStage.data(); - auto outEntryPoints = entryPoints.data(); - auto outShaderModule = vk_shaderModule.data(); - auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); - auto outSpecInfo = vk_specializationInfos.data(); - auto outSpecMapEntry = vk_specializationMapEntry.data(); - auto outSpecData = specializationData.data(); - - //shader - for (const auto& info : createInfos) - { - outCreateInfo->pStages = outShaderStage; - auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) - { - if (spec.shader) - { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, - shaderStage, - false, - outShaderModule, - outEntryPoints, - outRequiredSubgroupSize, - outSpecInfo, - outSpecMapEntry, - outSpecData - ); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); - } - }; - processSpecShader(info.taskShader, hlsl::ShaderStage::ESS_TASK); - processSpecShader(info.meshShader, hlsl::ShaderStage::ESS_MESH); - processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - - outCreateInfo++; - } - - auto vk_pipelines = reinterpret_cast(output); - std::stringstream debugNameBuilder; - if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) - { - for (size_t i = 0ull; i < createInfos.size(); ++i) - { - const auto& createInfo = createInfos[i]; - const VkPipeline vk_pipeline = vk_pipelines[i]; - // break the lifetime cause of the aliasing - std::uninitialized_default_construct_n(output + i, 1); - output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); - debugNameBuilder.str(""); - auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) - { - if (spec.shader != nullptr) - debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; - }; - buildDebugName(createInfo.taskShader, hlsl::ESS_TASK); - buildDebugName(createInfo.meshShader, hlsl::ESS_MESH); - buildDebugName(createInfo.fragmentShader, hlsl::ESS_FRAGMENT); - output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); - } - } - else - std::fill_n(output, vk_createInfos.size(), nullptr); } void CVulkanLogicalDevice::createGraphicsPipelines_impl( @@ -1555,58 +1436,6 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); - auto outCreateInfo = vk_createInfos.data(); - auto outVertexInput = vk_vertexInput.data(); - auto outInputBinding = vk_inputBinding.data(); - auto outInputAttribute = vk_inputAttribute.data(); - auto outTessellation = vk_tessellation.data(); - auto outInputAssembly = vk_inputAssembly.data(); - - //ill acknowledge this additional looping is a little ugly - //input and tess - for (const auto& info : createInfos) - { - { - const auto& vertexInputParams = info.cached.vertexInput; - outVertexInput->pVertexBindingDescriptions = outInputBinding; - for (auto b = 0u; b < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; b++) - if (vertexInputParams.enabledBindingFlags & (1 << b)) - { - outInputBinding->binding = b; - outInputBinding->stride = vertexInputParams.bindings[b].stride; - outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); - outInputBinding++; - } - outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions, outInputBinding); - outVertexInput->pVertexAttributeDescriptions = outInputAttribute; - for (auto l = 0u; l < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; l++) - if (vertexInputParams.enabledAttribFlags & (1 << l)) - { - outInputAttribute->location = l; - outInputAttribute->binding = vertexInputParams.attributes[l].binding; - outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); - outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; - outInputAttribute++; - } - outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions, outInputAttribute); - } - outCreateInfo->pVertexInputState = outVertexInput++; - { - const auto& primAssParams = info.cached.primitiveAssembly; - outInputAssembly->topology = static_cast(primAssParams.primitiveType); - outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; - } - outCreateInfo->pInputAssemblyState = outInputAssembly++; - - if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) - { - outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; - outCreateInfo->pTessellationState = outTessellation++; - } - - outCreateInfo++; - } - const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); @@ -1626,8 +1455,14 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); auto outSpecData = specializationData.data(); + auto outVertexInput = vk_vertexInput.data(); + auto outInputBinding = vk_inputBinding.data(); + auto outInputAttribute = vk_inputAttribute.data(); + auto outTessellation = vk_tessellation.data(); + auto outInputAssembly = vk_inputAssembly.data(); + - //shader + //graphics only stuff for (const auto& info : createInfos) { outCreateInfo->pStages = outShaderStage; @@ -1635,17 +1470,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( { if (spec.shader) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, - shaderStage, - false, - outShaderModule, - outEntryPoints, - outRequiredSubgroupSize, - outSpecInfo, - outSpecMapEntry, - outSpecData - ); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); } }; processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -1654,9 +1480,52 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + // when dealing with mesh shaders, the vertex input and assembly state will be null + { + { + const auto& vertexInputParams = info.cached.vertexInput; + outVertexInput->pVertexBindingDescriptions = outInputBinding; + for (auto b=0u; bbinding = b; + outInputBinding->stride = vertexInputParams.bindings[b].stride; + outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); + outInputBinding++; + } + outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions,outInputBinding); + outVertexInput->pVertexAttributeDescriptions = outInputAttribute; + for (auto l=0u; llocation = l; + outInputAttribute->binding = vertexInputParams.attributes[l].binding; + outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); + outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; + outInputAttribute++; + } + outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions,outInputAttribute); + } + outCreateInfo->pVertexInputState = outVertexInput++; + { + const auto& primAssParams = info.cached.primitiveAssembly; + outInputAssembly->topology = static_cast(primAssParams.primitiveType); + outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; + } + outCreateInfo->pInputAssemblyState = outInputAssembly++; + + if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) + { + outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; + outCreateInfo->pTessellationState = outTessellation++; + } + } + outCreateInfo++; } + + auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 0d1f75918d..d1cacda54f 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -29,8 +29,8 @@ #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" -#include "nbl/video/CVulkanGraphicsPipeline.h" #include "nbl/video/CVulkanComputePipeline.h" +#include "nbl/video/CVulkanGraphicsPipeline.h" #include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" @@ -297,7 +297,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice ) override; void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span createInfos, + const std::span params, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; //final? diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index ff783eaa3d..561574b83d 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -950,7 +950,7 @@ bool ILogicalDevice::createMeshPipelines( if (params[ix].taskShader.shader != nullptr) { if (!features.taskShader) { - NBL_LOG_ERROR("Feature `task shader` is not enabled"); + NBL_LOG_ERROR("Feature `mesh shader` is not enabled"); return false; } } From e4abc1f02259a34396eff35a004f2fa11ac4429f Mon Sep 17 00:00:00 2001 From: Corey Date: Fri, 3 Oct 2025 17:04:12 -0500 Subject: [PATCH 135/140] graphics are stable - mesh prototype complete i PROBABLY messed up something in mesh. committing so I can keep track of changes while I test in the example Signed-off-by: Corey --- include/nbl/asset/ICPUMeshPipeline.h | 16 +- include/nbl/asset/IMeshPipeline.h | 13 -- include/nbl/video/IGPUMeshPipeline.h | 3 +- include/nbl/video/asset_traits.h | 3 - src/nbl/video/CVulkanLogicalDevice.cpp | 237 ++++++++++++++++++------ src/nbl/video/CVulkanLogicalDevice.h | 4 +- src/nbl/video/CVulkanPhysicalDevice.cpp | 35 +--- 7 files changed, 202 insertions(+), 109 deletions(-) diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index 7a0aa07e8d..ec679a48be 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -39,9 +39,11 @@ class ICPUMeshPipeline final : public ICPUPipeline getSpecInfos(const hlsl::ShaderStage stage) const override final { - const auto stageIndex = stageToIndex(stage); - if (stageIndex != -1) - return { &m_specInfos[stageIndex], 1 }; + switch (stage) { + case hlsl::ShaderStage::ESS_TASK: return { &m_specInfos[0], 1 }; + case hlsl::ShaderStage::ESS_MESH: return { &m_specInfos[1], 1 }; + case hlsl::ShaderStage::ESS_FRAGMENT: return { &m_specInfos[2], 2 }; + } return {}; } @@ -53,9 +55,11 @@ class ICPUMeshPipeline final : public ICPUPipeline& stagePresence) { - /* - VUID-VkPipelineShaderStageCreateInfo-stage-02091 - If the meshShaders feature is not enabled, stage must not be VK_SHADER_STAGE_MESH_BIT_EXT - - VUID-VkPipelineShaderStageCreateInfo-stage-02092 - If the taskShaders feature is not enabled, stage must not be VK_SHADER_STAGE_TASK_BIT_EXT - - need to check extentions here - - maybe assert vertex,geo, and tess arent used here? - */ - - // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096 if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) { return false; diff --git a/include/nbl/video/IGPUMeshPipeline.h b/include/nbl/video/IGPUMeshPipeline.h index 37bf409355..4daddab69f 100644 --- a/include/nbl/video/IGPUMeshPipeline.h +++ b/include/nbl/video/IGPUMeshPipeline.h @@ -73,6 +73,7 @@ namespace nbl::video inline SSpecializationValidationResult valid() const { + //this seems like the place to check if the mesh extension exists, but the raytracing pipeline doesnt do it here if (!layout) return {}; SSpecializationValidationResult retval = { .count = 0,.dataSize = 0 }; @@ -102,7 +103,7 @@ namespace nbl::video if (!hasRequiredStages(stagePresence)) return {}; - //if (!vertexShader.shader) return {}; //i dont quite understand why this line was here. checking if the shader itself was made correctly? + //if (!vertexShader.shader) return {}; //i dont quite understand why this line was in IGPUGraphics. checking if the shader itself was made correctly? return retval; } diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index b8e205aa4c..c4279a5cad 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -26,9 +26,6 @@ #include "nbl/asset/ICPURayTracingPipeline.h" #include "nbl/video/IGPURayTracingPipeline.h" -#include "nbl/asset/ICPUMeshPipeline.h" -#include "nbl/video/IGPUMeshPipeline.h" - namespace nbl::video { diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index abb018e16c..cd24704df8 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1360,13 +1360,132 @@ core::vector getDefaultDynamicStates(SPhysicalDeviceFeatures con void CVulkanLogicalDevice::createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, + const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) { const auto& features = getEnabledFeatures(); + const VkPipelineCache vk_pipelineCache = pipelineCache ? static_cast(pipelineCache)->getInternalObject() : VK_NULL_HANDLE; + core::vector vk_createInfos(createInfos.size(), { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,nullptr }); + + core::vector vk_rasterizationStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_multisampleStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_depthStencilStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendStates(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,nullptr,0 }); + core::vector vk_colorBlendAttachmentStates(createInfos.size() * IGPURenderpass::SCreationParams::SSubpassDescription::MaxColorAttachments); + + core::vector vk_dynamicStates = getDefaultDynamicStates(features); + + const VkPipelineDynamicStateCreateInfo vk_dynamicStateCreateInfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0u, + .dynamicStateCount = static_cast(vk_dynamicStates.size()), + .pDynamicStates = vk_dynamicStates.data() + }; + core::vector vk_viewportStates(createInfos.size(), { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, // the extensions that interest us have a dynamic state variant anyway + .flags = 0, // must be 0 + .viewportCount = 0, + .pViewports = nullptr, + .scissorCount = 0, + .pScissors = nullptr, + }); + + PopulateMeshGraphicsCommonData( + createInfos, vk_createInfos, + + vk_viewportStates, + vk_rasterizationStates, + vk_multisampleStates, + vk_depthStencilStates, + vk_colorBlendStates, + vk_colorBlendAttachmentStates, + + vk_dynamicStates, vk_dynamicStateCreateInfo + ); + + //not used in mesh pipelines + for (auto& outCreateInfo : vk_createInfos) { + outCreateInfo.pVertexInputState = nullptr; + outCreateInfo.pInputAssemblyState = nullptr; + outCreateInfo.pTessellationState = nullptr; + } + auto outCreateInfo = vk_createInfos.data(); + + const auto maxShaderStages = createInfos.size() * IGPUMeshPipeline::MESH_SHADER_STAGE_COUNT; + core::vector vk_shaderStage(maxShaderStages, { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr }); + core::vector vk_shaderModule(maxShaderStages, { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0 }); + core::vector entryPoints(maxShaderStages); + core::vector vk_requiredSubgroupSize(maxShaderStages, { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO,nullptr}); + core::vector vk_specializationInfos(maxShaderStages, { 0,nullptr,0,nullptr }); + core::vector vk_specializationMapEntry(validation.count); + core::vector specializationData(validation.dataSize); + auto outShaderStage = vk_shaderStage.data(); + auto outEntryPoints = entryPoints.data(); + auto outShaderModule = vk_shaderModule.data(); + auto outRequiredSubgroupSize = vk_requiredSubgroupSize.data(); + auto outSpecInfo = vk_specializationInfos.data(); + auto outSpecMapEntry = vk_specializationMapEntry.data(); + auto outSpecData = specializationData.data(); + + //shader + for (const auto& info : createInfos) + { + outCreateInfo->pStages = outShaderStage; + auto processSpecShader = [&](IGPUPipelineBase::SShaderSpecInfo spec, hlsl::ShaderStage shaderStage) + { + if (spec.shader) + { + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + } + }; + processSpecShader(info.taskShader, hlsl::ShaderStage::ESS_TASK); + processSpecShader(info.meshShader, hlsl::ShaderStage::ESS_MESH); + processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); + + outCreateInfo++; + } + + auto vk_pipelines = reinterpret_cast(output); + std::stringstream debugNameBuilder; + if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev, vk_pipelineCache, vk_createInfos.size(), vk_createInfos.data(), nullptr, vk_pipelines) == VK_SUCCESS) + { + for (size_t i = 0ull; i < createInfos.size(); ++i) + { + const auto& createInfo = createInfos[i]; + const VkPipeline vk_pipeline = vk_pipelines[i]; + // break the lifetime cause of the aliasing + std::uninitialized_default_construct_n(output + i, 1); + output[i] = core::make_smart_refctd_ptr(createInfos[i], vk_pipeline); + debugNameBuilder.str(""); + auto buildDebugName = [&](const IGPUPipelineBase::SShaderSpecInfo& spec, hlsl::ShaderStage stage) + { + if (spec.shader != nullptr) + debugNameBuilder << spec.shader->getFilepathHint() << "(" << spec.entryPoint << "," << stage << ")\n"; + }; + buildDebugName(createInfo.taskShader, hlsl::ESS_TASK); + buildDebugName(createInfo.meshShader, hlsl::ESS_MESH); + buildDebugName(createInfo.fragmentShader, hlsl::ESS_FRAGMENT); + output[i]->setObjectDebugName(debugNameBuilder.str().c_str()); + } + } + else + std::fill_n(output, vk_createInfos.size(), nullptr); } void CVulkanLogicalDevice::createGraphicsPipelines_impl( @@ -1436,6 +1555,58 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( core::vector vk_tessellation(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,nullptr,0 }); core::vector vk_vertexInput(createInfos.size(), { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,nullptr,0 }); + auto outCreateInfo = vk_createInfos.data(); + auto outVertexInput = vk_vertexInput.data(); + auto outInputBinding = vk_inputBinding.data(); + auto outInputAttribute = vk_inputAttribute.data(); + auto outTessellation = vk_tessellation.data(); + auto outInputAssembly = vk_inputAssembly.data(); + + //ill acknowledge this additional looping is a little ugly + //input and tess + for (const auto& info : createInfos) + { + { + const auto& vertexInputParams = info.cached.vertexInput; + outVertexInput->pVertexBindingDescriptions = outInputBinding; + for (auto b = 0u; b < asset::SVertexInputParams::MAX_ATTR_BUF_BINDING_COUNT; b++) + if (vertexInputParams.enabledBindingFlags & (1 << b)) + { + outInputBinding->binding = b; + outInputBinding->stride = vertexInputParams.bindings[b].stride; + outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); + outInputBinding++; + } + outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions, outInputBinding); + outVertexInput->pVertexAttributeDescriptions = outInputAttribute; + for (auto l = 0u; l < asset::SVertexInputParams::MAX_VERTEX_ATTRIB_COUNT; l++) + if (vertexInputParams.enabledAttribFlags & (1 << l)) + { + outInputAttribute->location = l; + outInputAttribute->binding = vertexInputParams.attributes[l].binding; + outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); + outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; + outInputAttribute++; + } + outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions, outInputAttribute); + } + outCreateInfo->pVertexInputState = outVertexInput++; + { + const auto& primAssParams = info.cached.primitiveAssembly; + outInputAssembly->topology = static_cast(primAssParams.primitiveType); + outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; + } + outCreateInfo->pInputAssemblyState = outInputAssembly++; + + if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) + { + outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; + outCreateInfo->pTessellationState = outTessellation++; + } + + outCreateInfo++; + } + const auto maxShaderStages = createInfos.size()*IGPUGraphicsPipeline::GRAPHICS_SHADER_STAGE_COUNT; core::vector vk_shaderStage(maxShaderStages,{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,nullptr}); core::vector vk_shaderModule(maxShaderStages,{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,nullptr, 0}); @@ -1455,14 +1626,8 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( auto outSpecInfo = vk_specializationInfos.data(); auto outSpecMapEntry = vk_specializationMapEntry.data(); auto outSpecData = specializationData.data(); - auto outVertexInput = vk_vertexInput.data(); - auto outInputBinding = vk_inputBinding.data(); - auto outInputAttribute = vk_inputAttribute.data(); - auto outTessellation = vk_tessellation.data(); - auto outInputAssembly = vk_inputAssembly.data(); - - //graphics only stuff + //shader for (const auto& info : createInfos) { outCreateInfo->pStages = outShaderStage; @@ -1470,8 +1635,17 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( { if (spec.shader) { - *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, shaderStage, false, outShaderModule, outEntryPoints, outRequiredSubgroupSize, outSpecInfo, outSpecMapEntry, outSpecData); - outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); + *(outShaderStage++) = getVkShaderStageCreateInfoFrom(spec, + shaderStage, + false, + outShaderModule, + outEntryPoints, + outRequiredSubgroupSize, + outSpecInfo, + outSpecMapEntry, + outSpecData + ); + outCreateInfo->stageCount = std::distancepStages)>(outCreateInfo->pStages, outShaderStage); } }; processSpecShader(info.vertexShader, hlsl::ShaderStage::ESS_VERTEX); @@ -1480,52 +1654,9 @@ void CVulkanLogicalDevice::createGraphicsPipelines_impl( processSpecShader(info.geometryShader, hlsl::ShaderStage::ESS_GEOMETRY); processSpecShader(info.fragmentShader, hlsl::ShaderStage::ESS_FRAGMENT); - // when dealing with mesh shaders, the vertex input and assembly state will be null - { - { - const auto& vertexInputParams = info.cached.vertexInput; - outVertexInput->pVertexBindingDescriptions = outInputBinding; - for (auto b=0u; bbinding = b; - outInputBinding->stride = vertexInputParams.bindings[b].stride; - outInputBinding->inputRate = static_cast(vertexInputParams.bindings[b].inputRate); - outInputBinding++; - } - outVertexInput->vertexBindingDescriptionCount = std::distance(outVertexInput->pVertexBindingDescriptions,outInputBinding); - outVertexInput->pVertexAttributeDescriptions = outInputAttribute; - for (auto l=0u; llocation = l; - outInputAttribute->binding = vertexInputParams.attributes[l].binding; - outInputAttribute->format = getVkFormatFromFormat(static_cast(vertexInputParams.attributes[l].format)); - outInputAttribute->offset = vertexInputParams.attributes[l].relativeOffset; - outInputAttribute++; - } - outVertexInput->vertexAttributeDescriptionCount = std::distance(outVertexInput->pVertexAttributeDescriptions,outInputAttribute); - } - outCreateInfo->pVertexInputState = outVertexInput++; - { - const auto& primAssParams = info.cached.primitiveAssembly; - outInputAssembly->topology = static_cast(primAssParams.primitiveType); - outInputAssembly->primitiveRestartEnable = primAssParams.primitiveRestartEnable; - } - outCreateInfo->pInputAssemblyState = outInputAssembly++; - - if (info.tesselationControlShader.shader || info.tesselationEvaluationShader.shader) - { - outTessellation->patchControlPoints = info.cached.primitiveAssembly.tessPatchVertCount; - outCreateInfo->pTessellationState = outTessellation++; - } - } - outCreateInfo++; } - - auto vk_pipelines = reinterpret_cast(output); std::stringstream debugNameBuilder; if (m_devf.vk.vkCreateGraphicsPipelines(m_vkdev,vk_pipelineCache,vk_createInfos.size(),vk_createInfos.data(),nullptr,vk_pipelines)==VK_SUCCESS) diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index d1cacda54f..0d1f75918d 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -29,8 +29,8 @@ #include "nbl/video/CVulkanDeferredOperation.h" #include "nbl/video/CVulkanAccelerationStructure.h" -#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanGraphicsPipeline.h" +#include "nbl/video/CVulkanComputePipeline.h" #include "nbl/video/CVulkanMeshPipeline.h" #include "nbl/video/CVulkanRayTracingPipeline.h" @@ -297,7 +297,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice ) override; void createMeshPipelines_impl( IGPUPipelineCache* const pipelineCache, - const std::span params, + const std::span createInfos, core::smart_refctd_ptr* const output, const SSpecializationValidationResult& validation ) override; //final? diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 1bc03b8c48..be1005bc28 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -828,39 +828,12 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features //depends on the less obvious properties if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { - features.taskShader = meshShaderFeatures.taskShader; features.meshShader = meshShaderFeatures.meshShader; + features.taskShader = meshShaderFeatures.taskShader; //TODO - //VkBool32 multiviewMeshShader; - //VkBool32 primitiveFragmentShadingRateMeshShader; - //VkBool32 meshShaderQueries; - - //VkPhysicalDeviceMeshShaderPropertiesEXT - //#define LIMIT_INIT_MESH(limitMemberName) properties.limits.limitMemberName = meshShaderProperties.limitMemberName - //LIMIT_INIT_MESH(maxTaskWorkGroupTotalCount); - //LIMIT_INIT_MESH(maxTaskWorkGroupInvocations); - //LIMIT_INIT_MESH(maxTaskPayloadSize); - //LIMIT_INIT_MESH(maxTaskSharedMemorySize); - //LIMIT_INIT_MESH(maxTaskPayloadAndSharedMemorySize); - //LIMIT_INIT_MESH(maxMeshWorkGroupInvocations); - //LIMIT_INIT_MESH(maxMeshSharedMemorySize); - //LIMIT_INIT_MESH(maxMeshPayloadAndSharedMemorySize); - //LIMIT_INIT_MESH(maxMeshOutputMemorySize); - //LIMIT_INIT_MESH(maxMeshOutputComponents); - //LIMIT_INIT_MESH(maxMeshOutputVertices); - //LIMIT_INIT_MESH(maxMeshOutputPrimitives); - //LIMIT_INIT_MESH(maxMeshOutputLayers); - //LIMIT_INIT_MESH(maxMeshMultiviewViewCount); - //LIMIT_INIT_MESH(maxMeshOutputPerVertexGranularity); - //LIMIT_INIT_MESH(maxMeshOutputPerPrimitiveGranularity); - - //for(uint8_t i = 0; i < 3; i++){ - // LIMIT_INIT_MESH(maxTaskWorkGroupCount[i]); - // LIMIT_INIT_MESH(maxTaskWorkGroupSize[i]); - // LIMIT_INIT_MESH(maxMeshWorkGroupCount[i]); - // LIMIT_INIT_MESH(maxMeshWorkGroupSize[i]); - //} - //#undef LIMIT_INIT_MESH + //meshShaderFeatures.primitiveFragmentShadingRateMeshShader; + //meshShaderFeatures.meshShaderQueries; + //meshShaderFeatures.multiviewMeshShader; } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) From 7b15bf7bdbb4ff62bcca352835ca107eedc91a4e Mon Sep 17 00:00:00 2001 From: Corey Date: Thu, 9 Oct 2025 04:49:06 -0500 Subject: [PATCH 136/140] still bug catching in example Signed-off-by: Corey --- include/nbl/asset/ICPUMeshPipeline.h | 2 +- include/nbl/video/IGPUMeshPipeline.h | 2 -- src/nbl/video/CVulkanPhysicalDevice.cpp | 35 ++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h index ec679a48be..b21a44b82c 100644 --- a/include/nbl/asset/ICPUMeshPipeline.h +++ b/include/nbl/asset/ICPUMeshPipeline.h @@ -42,7 +42,7 @@ class ICPUMeshPipeline final : public ICPUPipeline= renderpass->getSubpassCount()) diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index be1005bc28..1bc03b8c48 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -828,12 +828,39 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart //potentially put a copy of VkPhysicalDeviceMeshShaderFeaturesEXT directly into features //depends on the less obvious properties if (isExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { - features.meshShader = meshShaderFeatures.meshShader; features.taskShader = meshShaderFeatures.taskShader; + features.meshShader = meshShaderFeatures.meshShader; //TODO - //meshShaderFeatures.primitiveFragmentShadingRateMeshShader; - //meshShaderFeatures.meshShaderQueries; - //meshShaderFeatures.multiviewMeshShader; + //VkBool32 multiviewMeshShader; + //VkBool32 primitiveFragmentShadingRateMeshShader; + //VkBool32 meshShaderQueries; + + //VkPhysicalDeviceMeshShaderPropertiesEXT + //#define LIMIT_INIT_MESH(limitMemberName) properties.limits.limitMemberName = meshShaderProperties.limitMemberName + //LIMIT_INIT_MESH(maxTaskWorkGroupTotalCount); + //LIMIT_INIT_MESH(maxTaskWorkGroupInvocations); + //LIMIT_INIT_MESH(maxTaskPayloadSize); + //LIMIT_INIT_MESH(maxTaskSharedMemorySize); + //LIMIT_INIT_MESH(maxTaskPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshWorkGroupInvocations); + //LIMIT_INIT_MESH(maxMeshSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshPayloadAndSharedMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputMemorySize); + //LIMIT_INIT_MESH(maxMeshOutputComponents); + //LIMIT_INIT_MESH(maxMeshOutputVertices); + //LIMIT_INIT_MESH(maxMeshOutputPrimitives); + //LIMIT_INIT_MESH(maxMeshOutputLayers); + //LIMIT_INIT_MESH(maxMeshMultiviewViewCount); + //LIMIT_INIT_MESH(maxMeshOutputPerVertexGranularity); + //LIMIT_INIT_MESH(maxMeshOutputPerPrimitiveGranularity); + + //for(uint8_t i = 0; i < 3; i++){ + // LIMIT_INIT_MESH(maxTaskWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxTaskWorkGroupSize[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupCount[i]); + // LIMIT_INIT_MESH(maxMeshWorkGroupSize[i]); + //} + //#undef LIMIT_INIT_MESH } if (!deviceFeatures.features.sampleRateShading || !deviceFeatures.features.dualSrcBlend) From d4dd56f98e4006c15d344fd376d8ccc276e371c6 Mon Sep 17 00:00:00 2001 From: Corey Date: Sat, 20 Dec 2025 14:33:25 -0600 Subject: [PATCH 137/140] i was using dispatch for mesh shaders for some dumb reason (fixed) Signed-off-by: Corey --- include/nbl/video/IGPUCommandBuffer.h | 11 ++++++ src/nbl/device/CMakeLists.txt | 2 ++ src/nbl/video/CVulkanCommandBuffer.cpp | 15 ++++++++- src/nbl/video/CVulkanCommandBuffer.h | 3 ++ src/nbl/video/IGPUCommandBuffer.cpp | 46 ++++++++++++++++++++++++-- 5 files changed, 74 insertions(+), 3 deletions(-) diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 26e9b77b12..3290bd916a 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -443,6 +443,14 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject } bool dispatchIndirect(const asset::SBufferBinding& binding); + bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1); + template requires std::is_integral_v + bool drawMeshTasks(const hlsl::vector groupCount) + { + return drawMeshTasks(groupCount.x, groupCount.y, groupCount.z); + } + bool drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride); + //! Begin/End RenderPasses struct SRenderpassBeginInfo { @@ -705,6 +713,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject virtual bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; virtual bool dispatchIndirect_impl(const asset::SBufferBinding& binding) = 0; + virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0; + virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) = 0; + virtual bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) = 0; virtual bool nextSubpass_impl(const SUBPASS_CONTENTS contents) = 0; virtual bool endRenderPass_impl() = 0; diff --git a/src/nbl/device/CMakeLists.txt b/src/nbl/device/CMakeLists.txt index 5bcea53112..5ae5d75da7 100644 --- a/src/nbl/device/CMakeLists.txt +++ b/src/nbl/device/CMakeLists.txt @@ -53,6 +53,8 @@ set(NBL_COMMAND ${NBL_OUTPUT_GEN_ARGUMENTS} ) +file(MAKE_DIRECTORY "${DEVICEGEN_BUILTIN_RESOURCES_DIRECTORY_PATH}") + add_custom_command(OUTPUT ${NBL_OUTPUT_HEADERS} DEPENDS ${NBL_DEPENDS} COMMAND ${NBL_COMMAND} diff --git a/src/nbl/video/CVulkanCommandBuffer.cpp b/src/nbl/video/CVulkanCommandBuffer.cpp index c73557da02..59d056bfdf 100644 --- a/src/nbl/video/CVulkanCommandBuffer.cpp +++ b/src/nbl/video/CVulkanCommandBuffer.cpp @@ -640,10 +640,23 @@ bool CVulkanCommandBuffer::dispatch_impl(const uint32_t groupCountX, const uint3 bool CVulkanCommandBuffer::dispatchIndirect_impl(const asset::SBufferBinding& binding) { - getFunctionTable().vkCmdDispatchIndirect(m_cmdbuf,static_cast(binding.buffer.get())->getInternalObject(),binding.offset); + getFunctionTable().vkCmdDispatchIndirect(m_cmdbuf, static_cast(binding.buffer.get())->getInternalObject(), binding.offset); return true; } +bool CVulkanCommandBuffer::drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) +{ + getFunctionTable().vkCmdDrawMeshTasksEXT(m_cmdbuf, groupCountX, groupCountY, groupCountZ); + return true; +} + +bool CVulkanCommandBuffer::drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) +{ + getFunctionTable().vkCmdDrawMeshTasksIndirectEXT(m_cmdbuf, static_cast(binding.buffer.get())->getInternalObject(), binding.offset, drawCount, stride); + return true; +} + + bool CVulkanCommandBuffer::beginRenderPass_impl(const SRenderpassBeginInfo& info, const SUBPASS_CONTENTS contents) { diff --git a/src/nbl/video/CVulkanCommandBuffer.h b/src/nbl/video/CVulkanCommandBuffer.h index 41e6fa5e6f..ba3925ffe2 100644 --- a/src/nbl/video/CVulkanCommandBuffer.h +++ b/src/nbl/video/CVulkanCommandBuffer.h @@ -210,6 +210,9 @@ class CVulkanCommandBuffer final : public IGPUCommandBuffer bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; bool dispatchIndirect_impl(const asset::SBufferBinding& binding) override; + bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) override; + bool drawMeshTasksIndirect_impl(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) override; + bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) override; bool nextSubpass_impl(const SUBPASS_CONTENTS contents) override; bool endRenderPass_impl() override; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index f080202649..b3bf6dba75 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1460,7 +1460,7 @@ bool IGPUCommandBuffer::dispatch(const uint32_t groupCountX, const uint32_t grou allowedRenderpassScope = outside; */ - if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT | queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::BOTH)) + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT,RENDERPASS_SCOPE::OUTSIDE)) return false; if (groupCountX==0 || groupCountY==0 || groupCountZ==0) @@ -1482,7 +1482,7 @@ bool IGPUCommandBuffer::dispatch(const uint32_t groupCountX, const uint32_t grou bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBinding& binding) { - if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT | queue_flags_t::GRAPHICS_BIT, RENDERPASS_SCOPE::BOTH)) + if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT, RENDERPASS_SCOPE::OUTSIDE)) return false; //side mission - find out what 4 is. may impact mesh in a way im not expecting if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) @@ -1498,6 +1498,48 @@ bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBindinggetPhysicalDevice()->getLimits(); + if (groupCountX > limits.maxMeshWorkGroupCount[0] || groupCountY > limits.maxMeshWorkGroupCount[1] || groupCountZ > limits.maxMeshWorkGroupCount[2]) + { + NBL_LOG_ERROR("group counts (%d, %d, %d) exceeds maximum counts (%d, %d, %d)!", groupCountX, groupCountY, groupCountZ, limits.maxMeshWorkGroupCount[0], limits.maxMeshWorkGroupCount[1], limits.maxMeshWorkGroupCount[2]); + return false; + } + + m_noCommands = false; + return drawMeshTasks_impl(groupCountX, groupCountY, groupCountZ); +} + +bool IGPUCommandBuffer::drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) +{ + assert(false && "still needs to be implemented - i just lazily copied dispatch indirect"); + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT, RENDERPASS_SCOPE::INSIDE)) + return false; + //4 is alignment + if (invalidBufferBinding(binding, 4u/*TODO: is it really 4?*/, IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) + return false; + //if (invalidDrawMeshTasksIndirect(binding, drawCount, stride)) + //return false; + + if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(binding.buffer))) + { + NBL_LOG_ERROR("out of host memory!"); + return false; + } + + m_noCommands = false; + return drawMeshTasksIndirect_impl(binding, drawCount, stride); +} + bool IGPUCommandBuffer::beginRenderPass(SRenderpassBeginInfo info, const SUBPASS_CONTENTS contents) { From b591c5ef6a59785d20c38f94aba0a7fc5c02f2df Mon Sep 17 00:00:00 2001 From: Corey Date: Wed, 24 Dec 2025 08:02:28 -0600 Subject: [PATCH 138/140] minor cleanup Signed-off-by: Corey --- ...TFramework,Version=v4.0.AssemblyAttributes.cs | 4 ++++ .../LzmaAlone.csproj.AssemblyReference.cache | Bin 0 -> 4382 bytes ...TFramework,Version=v4.0.AssemblyAttributes.cs | 4 ++++ .../LzmaAlone.csproj.AssemblyReference.cache | Bin 0 -> 4538 bytes include/nbl/video/IGPUCommandPool.h | 1 - src/nbl/CMakeLists.txt | 2 -- src/nbl/device/CMakeLists.txt | 2 -- src/nbl/video/IGPUCommandBuffer.cpp | 1 - 8 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs create mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache create mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs create mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs new file mode 100644 index 0000000000..9e65edd817 --- /dev/null +++ b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs @@ -0,0 +1,4 @@ +// +using System; +using System.Reflection; +[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.0", FrameworkDisplayName = ".NET Framework 4")] diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache new file mode 100644 index 0000000000000000000000000000000000000000..5c0a26ee2282a6d6ce8b54cc6311c7fb199e9f2c GIT binary patch literal 4382 zcmd5=J8#oa6!wLJLRwT_LR4bN$Pn3WlQeChLJ&%+N=s>~QVC0u)buL9k!FY4)NpEKg69=O)O1-Yv1M6C zh*)4^r=;oCoftwWMki4pOn3$?+(({ho4oYhAs6HwU93nl&T$-<&v7mW&WC}kW8fMX zIO@fQaD*75gL*|V^3BUFet9VU?LL~N>ywFPtQ(T1PC`87kxb+**|c;#sbb621v!~m zuu8Hd&f?mVR>kUMDVr5CVrFb)B9+F&;}ec26mOtm&=t=P3WkA|l3e4bFXW4Jg_)~- zp=Ov^;YS^h#bUEXcR^!2AM@*fc6J~F;s$QZ3};8P47%xYb7aLGv`aA*gcKdL<~6k8U^Xv^w5_p9Sl^ID91g1A zEF{k_ngL~{Nz?vAR+ziv+_k!})!Q-#b<|N$6T*_SqPK37MRnO`KK{M^Wb0F_YzGXu z*?F*ty$IejpX1k;zkrT*BdFb!Kw5l_Nk|z5v{Xn5hJZRs(e{?nIkynYNnK1#SQ#7iGQ5s*K1z`Y4X`w+M#Dh3?@)!0i6 z>Lzngy)c6GGm)On)K~$FmM)pKP@L{{dgj^_qSIdx^yhqoY3@G|&JQpI868kHU6IPF dz^d|EKYUOtqUX8{*Y+Z{Qne@O2YZHL{sFYKy=nje literal 0 HcmV?d00001 diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs new file mode 100644 index 0000000000..9e65edd817 --- /dev/null +++ b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs @@ -0,0 +1,4 @@ +// +using System; +using System.Reflection; +[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.0", FrameworkDisplayName = ".NET Framework 4")] diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache new file mode 100644 index 0000000000000000000000000000000000000000..56f21742fc9600040618ba44a395503b95c7db57 GIT binary patch literal 4538 zcmd5=J8#oa6!wLJLRwT_LR4bN$Pn3WlQeChLJ&%+N=s>~QVC0uwiUY+05O zA|9C7DQS9jCx#G;(M8k;3!V!M_mN+;Ozt%}vjQZ_4O#LU>pL@JGk$0wXfDBeKBpevpo6bu6^CAr2=U&t5d3Nu&v zLd`I-!jC!;i^XP-u0dlvAM@*fc6J~G5(Xa23};8P47%wFb7OHE9Wbt*u9L{BOs~jaCO z3&rz`Wie?k;*p0^h*nZC=n`KE-BWitjv0gH?k&56`W%@tJ2gbzj@ABLX~-roED#vbXro+9n(qlBA6yz~JS0r^u0+?zmj4uMCaV$cCljlIO6 zZZZem3s;bNCepK+IxAq&(j~JNiqpML&s=*#bovT{{yX1b+8+M~!YP?uH|VwjRnrxz etO~3uul2(lwIX`%%W!WSsg m_dstBuffer; }; -//i dont really understand how to mirror this with mesh pipeline yet class IGPUCommandPool::CBindGraphicsPipelineCmd final : public IFixedSizeCommand { public: diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 6f5a6aabb4..3bc379c08f 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -281,8 +281,6 @@ set(NBL_VIDEO_SOURCES video/CVulkanComputePipeline.cpp video/CVulkanMeshPipeline.cpp video/CVulkanRayTracingPipeline.cpp - video/CVulkanComputePipeline.cpp - video/CVulkanMeshPipeline.cpp video/CVulkanEvent.cpp video/CSurfaceVulkan.cpp diff --git a/src/nbl/device/CMakeLists.txt b/src/nbl/device/CMakeLists.txt index 5ae5d75da7..5bcea53112 100644 --- a/src/nbl/device/CMakeLists.txt +++ b/src/nbl/device/CMakeLists.txt @@ -53,8 +53,6 @@ set(NBL_COMMAND ${NBL_OUTPUT_GEN_ARGUMENTS} ) -file(MAKE_DIRECTORY "${DEVICEGEN_BUILTIN_RESOURCES_DIRECTORY_PATH}") - add_custom_command(OUTPUT ${NBL_OUTPUT_HEADERS} DEPENDS ${NBL_DEPENDS} COMMAND ${NBL_COMMAND} diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index b3bf6dba75..05063da5e3 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1484,7 +1484,6 @@ bool IGPUCommandBuffer::dispatchIndirect(const asset::SBufferBinding Date: Wed, 24 Dec 2025 08:11:49 -0600 Subject: [PATCH 139/140] cleaning Signed-off-by: Corey --- ...TFramework,Version=v4.0.AssemblyAttributes.cs | 4 ---- .../LzmaAlone.csproj.AssemblyReference.cache | Bin 4382 -> 0 bytes ...TFramework,Version=v4.0.AssemblyAttributes.cs | 4 ---- .../LzmaAlone.csproj.AssemblyReference.cache | Bin 4538 -> 0 bytes 4 files changed, 8 deletions(-) delete mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs delete mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache delete mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs delete mode 100644 3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs deleted file mode 100644 index 9e65edd817..0000000000 --- a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/.NETFramework,Version=v4.0.AssemblyAttributes.cs +++ /dev/null @@ -1,4 +0,0 @@ -// -using System; -using System.Reflection; -[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.0", FrameworkDisplayName = ".NET Framework 4")] diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Debug/LzmaAlone.csproj.AssemblyReference.cache deleted file mode 100644 index 5c0a26ee2282a6d6ce8b54cc6311c7fb199e9f2c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4382 zcmd5=J8#oa6!wLJLRwT_LR4bN$Pn3WlQeChLJ&%+N=s>~QVC0u)buL9k!FY4)NpEKg69=O)O1-Yv1M6C zh*)4^r=;oCoftwWMki4pOn3$?+(({ho4oYhAs6HwU93nl&T$-<&v7mW&WC}kW8fMX zIO@fQaD*75gL*|V^3BUFet9VU?LL~N>ywFPtQ(T1PC`87kxb+**|c;#sbb621v!~m zuu8Hd&f?mVR>kUMDVr5CVrFb)B9+F&;}ec26mOtm&=t=P3WkA|l3e4bFXW4Jg_)~- zp=Ov^;YS^h#bUEXcR^!2AM@*fc6J~F;s$QZ3};8P47%xYb7aLGv`aA*gcKdL<~6k8U^Xv^w5_p9Sl^ID91g1A zEF{k_ngL~{Nz?vAR+ziv+_k!})!Q-#b<|N$6T*_SqPK37MRnO`KK{M^Wb0F_YzGXu z*?F*ty$IejpX1k;zkrT*BdFb!Kw5l_Nk|z5v{Xn5hJZRs(e{?nIkynYNnK1#SQ#7iGQ5s*K1z`Y4X`w+M#Dh3?@)!0i6 z>Lzngy)c6GGm)On)K~$FmM)pKP@L{{dgj^_qSIdx^yhqoY3@G|&JQpI868kHU6IPF dz^d|EKYUOtqUX8{*Y+Z{Qne@O2YZHL{sFYKy=nje diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs deleted file mode 100644 index 9e65edd817..0000000000 --- a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/.NETFramework,Version=v4.0.AssemblyAttributes.cs +++ /dev/null @@ -1,4 +0,0 @@ -// -using System; -using System.Reflection; -[assembly: global::System.Runtime.Versioning.TargetFrameworkAttribute(".NETFramework,Version=v4.0", FrameworkDisplayName = ".NET Framework 4")] diff --git a/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache b/3rdparty/lzma/CS/7zip/Compress/LzmaAlone/obj/Release/LzmaAlone.csproj.AssemblyReference.cache deleted file mode 100644 index 56f21742fc9600040618ba44a395503b95c7db57..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4538 zcmd5=J8#oa6!wLJLRwT_LR4bN$Pn3WlQeChLJ&%+N=s>~QVC0uwiUY+05O zA|9C7DQS9jCx#G;(M8k;3!V!M_mN+;Ozt%}vjQZ_4O#LU>pL@JGk$0wXfDBeKBpevpo6bu6^CAr2=U&t5d3Nu&v zLd`I-!jC!;i^XP-u0dlvAM@*fc6J~G5(Xa23};8P47%wFb7OHE9Wbt*u9L{BOs~jaCO z3&rz`Wie?k;*p0^h*nZC=n`KE-BWitjv0gH?k&56`W%@tJ2gbzj@ABLX~-roED#vbXro+9n(qlBA6yz~JS0r^u0+?zmj4uMCaV$cCljlIO6 zZZZem3s;bNCepK+IxAq&(j~JNiqpML&s=*#bovT{{yX1b+8+M~!YP?uH|VwjRnrxz etO~3uul2(lwIX`%%W!WSsg Date: Wed, 24 Dec 2025 13:38:11 -0600 Subject: [PATCH 140/140] indirect commands are simpler than i expected Signed-off-by: Corey --- .../nbl/builtin/hlsl/indirect_commands.hlsl | 8 ++++++ src/nbl/video/IGPUCommandBuffer.cpp | 26 ++++++++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl index ca8418bde7..79057fd4a2 100644 --- a/include/nbl/builtin/hlsl/indirect_commands.hlsl +++ b/include/nbl/builtin/hlsl/indirect_commands.hlsl @@ -37,6 +37,14 @@ struct DispatchIndirectCommand_t uint32_t num_groups_z; }; +// distinct struct, new name with the same data - https://docs.vulkan.org/refpages/latest/refpages/source/VkDrawMeshTasksIndirectCommandEXT.html +struct DrawMeshTasksIndirectCommand_t +{ + uint32_t num_groups_x; + uint32_t num_groups_y; + uint32_t num_groups_z; +}; + struct TraceRaysIndirectCommand_t { uint64_t raygenShaderRecordAddress; diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index 05063da5e3..d86a5a582d 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -1520,16 +1520,28 @@ bool IGPUCommandBuffer::drawMeshTasks(const uint32_t groupCountX, const uint32_t bool IGPUCommandBuffer::drawMeshTasksIndirect(const asset::SBufferBinding& binding, const uint32_t drawCount, const uint32_t stride) { - assert(false && "still needs to be implemented - i just lazily copied dispatch indirect"); - if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT, RENDERPASS_SCOPE::INSIDE)) + if (!checkStateBeforeRecording(queue_flags_t::GRAPHICS_BIT,RENDERPASS_SCOPE::INSIDE)) return false; - //4 is alignment - if (invalidBufferBinding(binding, 4u/*TODO: is it really 4?*/, IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) + if (invalidBufferBinding(binding,4u/*TODO: is it really 4?*/,IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)){ return false; - //if (invalidDrawMeshTasksIndirect(binding, drawCount, stride)) - //return false; + } + + if (drawCount) { + if (drawCount==1u) + stride = sizeof(DrawMeshTasksIndirectCommand_t); + if (stride&0x3u || stride getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount) { + NBL_LOG_ERROR("draw count (%d) exceeds maximum allowed amount (%d)!", drawCount, getOriginDevice()->getPhysicalDevice()->getLimits().maxDrawIndirectCount); + return false; + } + if (invalidBufferRange({ binding.offset,stride * (drawCount - 1u) + sizeof(IndirectCommand),binding.buffer }, alignof(uint32_t), IGPUBuffer::EUF_INDIRECT_BUFFER_BIT)) + return false; + } // i get the feeling the vk command shouldnt be called if drawCount is 0, but this is how drawindirect does it - if (!m_cmdpool->m_commandListPool.emplace(m_commandList, core::smart_refctd_ptr(binding.buffer))) + if (!m_cmdpool->m_commandListPool.emplace(m_commandList,core::smart_refctd_ptr(binding.buffer))) { NBL_LOG_ERROR("out of host memory!"); return false;