Devsh-Graphics-Programming · GDBobby · Sep 30, 2025 · Oct 3, 2025 · Oct 3, 2025 · Oct 9, 2025
diff --git a/.gitmodules b/.gitmodules
@@ -84,7 +84,7 @@
 	url = git@github.com:Devsh-Graphics-Programming/Nabla-Continous-Integration-Python-Framework.git
 [submodule "3rdparty/boost/superproject"]
 	path = 3rdparty/boost/superproject
-	url = ../boost.git
+	url = git@github.com:Devsh-Graphics-Programming/boost.git
 [submodule "3rdparty/argparse"]
 	path = 3rdparty/argparse
 	url = git@github.com:p-ranav/argparse.git
@@ -117,7 +117,7 @@
 	url = git@github.com:Devsh-Graphics-Programming/glm.git
 [submodule "docker/msvc-winsdk"]
 	path = docker/msvc-winsdk
-	url = ../docker-nanoserver-msvc-winsdk
+	url = git@github.com:Devsh-Graphics-Programming/docker-nanoserver-msvc-winsdk.git
 [submodule "3rdparty/gtl"]
 	path = 3rdparty/gtl
 	url = https://github.com/greg7mdp/gtl.git

diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp
@@ -74,6 +74,8 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication
 
 				const auto supportedPreferredFormats = getPreferredDeviceFeatures().intersectWith(m_physicalDevice->getFeatures());
 				params.featuresToEnable = getRequiredDeviceFeatures().unionWith(supportedPreferredFormats);
+				params.featuresToEnable.meshShader = true;
+				params.featuresToEnable.taskShader = true;
 
 				m_device = m_physicalDevice->createLogicalDevice(std::move(params));
 				if (!m_device)

diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h
@@ -95,6 +95,7 @@ class IAsset : virtual public core::IReferenceCounted
 			ET_PIPELINE_CACHE = 1ull<<21,						//!< asset::ICPUPipelineCache
 			ET_SCENE = 1ull<<22,								//!< reserved, to implement later
 			ET_RAYTRACING_PIPELINE = 1ull << 23, //!< asset::ICPURayTracingPipeline
+			ET_MESH_PIPELINE = 1ull << 24,
 			ET_IMPLEMENTATION_SPECIFIC_METADATA = 1ull<<31u,    //!< lights, etc.
 			//! Reserved special value used for things like terminating lists of this enum
 

diff --git a/include/nbl/asset/ICPUMeshPipeline.h b/include/nbl/asset/ICPUMeshPipeline.h
@@ -0,0 +1,145 @@
+#ifndef _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_
+#define _NBL_I_CPU_MESH_PIPELINE_H_INCLUDED_
+
+
+#include "nbl/asset/IMeshPipeline.h"
+#include "nbl/asset/ICPURenderpass.h"
+#include "nbl/asset/ICPUPipeline.h"
+
+
+namespace nbl::asset
+{
+
+class ICPUMeshPipeline final : public ICPUPipeline<IMeshPipeline<ICPUPipelineLayout,ICPURenderpass>>
+{
+        using pipeline_base_t = IMeshPipeline<ICPUPipelineLayout, ICPURenderpass>;
+        using base_t = ICPUPipeline<pipeline_base_t>;
+
+    public:
+
+        static core::smart_refctd_ptr<ICPUMeshPipeline> create(ICPUPipelineLayout* layout, ICPURenderpass* renderpass = nullptr)
+        {
+            auto retval = new ICPUMeshPipeline(layout, renderpass);
+            return core::smart_refctd_ptr<ICPUMeshPipeline>(retval,core::dont_grab);
+        }
+
+        constexpr static inline auto AssetType = ET_MESH_PIPELINE;
+        inline E_TYPE getAssetType() const override { return AssetType; }
+
+        inline const SCachedCreationParams& getCachedCreationParams() const
+        {
+            return pipeline_base_t::getCachedCreationParams();
+        }
+
+        inline SCachedCreationParams& getCachedCreationParams()
+        {
+            assert(isMutable());
+            return m_params;
+        }
+
+        inline std::span<const SShaderSpecInfo> getSpecInfos(const hlsl::ShaderStage stage) const override final
+        {
+            switch (stage) {
+                case hlsl::ShaderStage::ESS_TASK:       return { &m_specInfos[0], 1 };
+                case hlsl::ShaderStage::ESS_MESH:       return { &m_specInfos[1], 1 };
+                case hlsl::ShaderStage::ESS_FRAGMENT:   return { &m_specInfos[2], 1 };
+            }
+            return {};
+        }
+
+        inline std::span<SShaderSpecInfo> getSpecInfos(const hlsl::ShaderStage stage)
+        {
+            return base_t::getSpecInfos(stage);
+        }
+
+        SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage)
+        {
+            if (!isMutable()) return nullptr;
+            switch (stage) {
+                case hlsl::ShaderStage::ESS_TASK:       return &m_specInfos[0];
+                case hlsl::ShaderStage::ESS_MESH:       return &m_specInfos[1];
+                case hlsl::ShaderStage::ESS_FRAGMENT:   return &m_specInfos[2];
+            }
+            return nullptr;
+        }
+
+        const SShaderSpecInfo* getSpecInfo(const hlsl::ShaderStage stage) const
+        {
+            const auto stageIndex = stageToIndex(stage);
+            if (stageIndex != -1)
+                return &m_specInfos[stageIndex];
+            return nullptr;
+        }
+
+        inline bool valid() const override
+        {
+            if (!m_layout) return false;
+            if (!m_layout->valid())return false;
+
+            // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-dynamicRendering-06576
+            if (!m_renderpass || m_params.subpassIx >= m_renderpass->getSubpassCount()) return false;
+
+            core::bitflag<hlsl::ShaderStage> stagePresence = {};
+            for (auto shader_i = 0u; shader_i < m_specInfos.size(); shader_i++)
+            {
+                const auto& info = m_specInfos[shader_i];
+                if (info.shader)
+                    stagePresence |= indexToStage(shader_i);
+            }
+            return hasRequiredStages(stagePresence);
+        }
+
+    protected:
+        using base_t::base_t;
+        virtual ~ICPUMeshPipeline() override = default;
+
+        std::array<SShaderSpecInfo, MESH_SHADER_STAGE_COUNT> m_specInfos;
+
+    private:
+        explicit ICPUMeshPipeline(ICPUPipelineLayout* layout, ICPURenderpass* renderpass)
+            : base_t(layout, {}, renderpass)
+            {}
+
+        static inline int8_t stageToIndex(const hlsl::ShaderStage stage)
+        {
+            const auto stageIx = hlsl::findLSB(stage);
+            if (stageIx < 0 || stageIx >= MESH_SHADER_STAGE_COUNT || hlsl::bitCount(stage)!=1)
+              return -1;
+            return stageIx;
+        }
+
+        static inline hlsl::ShaderStage indexToStage(const int8_t index)
+        {
+            switch (index) {
+                case 0: return hlsl::ShaderStage::ESS_TASK;
+                case 1: return hlsl::ShaderStage::ESS_MESH;
+                case 2: return hlsl::ShaderStage::ESS_FRAGMENT;
+            }
+            return hlsl::ShaderStage::ESS_UNKNOWN;
+        }
+
+        inline core::smart_refctd_ptr<base_t> clone_impl(core::smart_refctd_ptr<ICPUPipelineLayout>&& layout, uint32_t depth) const override final
+        {
+            auto* newPipeline = new ICPUMeshPipeline(layout.get(), m_renderpass.get());
+            newPipeline->m_params = m_params;
+
+            for (auto specInfo_i = 0u; specInfo_i < m_specInfos.size(); specInfo_i++)
+            {
+                newPipeline->m_specInfos[specInfo_i] = m_specInfos[specInfo_i].clone(depth);
+            }
+
+            return core::smart_refctd_ptr<base_t>(newPipeline, core::dont_grab);
+        }
+
+        inline void visitDependents_impl(std::function<bool(const IAsset*)> visit) const override
+        {
+            if (!visit(m_layout.get())) return;
+            if (!visit(m_renderpass.get())) return;
+            for (const auto& info : m_specInfos)
+              if (!visit(info.shader.get())) return;
+        }
+};
+
+}
+
+#endif
diff --git a/include/nbl/asset/IMeshPipeline.h b/include/nbl/asset/IMeshPipeline.h
@@ -0,0 +1,59 @@
+#ifndef _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_
+#define _NBL_ASSET_I_MESH_PIPELINE_H_INCLUDED_
+
+#include "nbl/asset/IShader.h"
+#include "nbl/asset/RasterizationStates.h"
+#include "nbl/asset/IPipeline.h"
+
+
+namespace nbl::asset {
+    class IMeshPipelineBase : public virtual core::IReferenceCounted {
+    public:
+        constexpr static inline uint8_t MESH_SHADER_STAGE_COUNT = 3u; //i dont know what this is going to be used for yet, might be redundant
+        struct SCachedCreationParams final {
+            SRasterizationParams rasterization = {};
+            SBlendParams blend = {};
+            uint32_t subpassIx = 0u; //this subpass stuff is eluding me rn. i might just need to crack open the vulkan documentation
+            uint8_t requireFullSubgroups = false;
+        };
+
+    };
+
+    template<typename PipelineLayoutType, typename RenderpassType>
+    class IMeshPipeline : public IPipeline<PipelineLayoutType>, public IMeshPipelineBase {
+    protected:
+        using renderpass_t = RenderpassType;
+        //using base_creation_params_t = IPipeline<PipelineLayoutType>;//compute uses this, idk if its necessary yet
+    public:
+
+        static inline bool hasRequiredStages(const core::bitflag<hlsl::ShaderStage>& stagePresence)
+        {
+            // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkGraphicsPipelineCreateInfo.html#VUID-VkGraphicsPipelineCreateInfo-stage-02096
+            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_MESH)) {
+                return false;
+            }
+            //i dont quite understand why igraphicspipeline doesnt require a fragment shader. is it not required by vulkan?
+            if (!stagePresence.hasFlags(hlsl::ShaderStage::ESS_FRAGMENT)) {
+                return false;
+            }
+
+            return true;
+        }
+
+        inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; }
+
+    protected:
+        explicit IMeshPipeline(PipelineLayoutType* layout, const SCachedCreationParams& cachedParams, renderpass_t* renderpass) :
+            IPipeline<PipelineLayoutType>(core::smart_refctd_ptr<PipelineLayoutType>(layout)),
+            m_params(cachedParams), m_renderpass(core::smart_refctd_ptr<renderpass_t>(renderpass))
+        {
+        }
+
+        SCachedCreationParams m_params = {};
+        core::smart_refctd_ptr<renderpass_t> m_renderpass = nullptr;
+    };
+
+}
+
+
+#endif
diff --git a/include/nbl/builtin/hlsl/indirect_commands.hlsl b/include/nbl/builtin/hlsl/indirect_commands.hlsl
@@ -37,6 +37,14 @@ struct DispatchIndirectCommand_t
 	uint32_t  num_groups_z;
 };
 
+// distinct struct, new name with the same data - https://docs.vulkan.org/refpages/latest/refpages/source/VkDrawMeshTasksIndirectCommandEXT.html
+struct DrawMeshTasksIndirectCommand_t
+{
+    uint32_t num_groups_x;
+    uint32_t num_groups_y;
+    uint32_t num_groups_z;
+};
+
 struct TraceRaysIndirectCommand_t
 {
     uint64_t raygenShaderRecordAddress;

diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h
@@ -328,8 +328,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
         bool copyAccelerationStructureFromMemory(const AccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo);
 
         //! state setup
-        bool bindComputePipeline(const IGPUComputePipeline* const pipeline);
         bool bindGraphicsPipeline(const IGPUGraphicsPipeline* const pipeline);
+        bool bindComputePipeline(const IGPUComputePipeline* const pipeline);
+        bool bindMeshPipeline(const IGPUMeshPipeline* const pipeline);
         bool bindRayTracingPipeline(const IGPURayTracingPipeline* const pipeline);
         bool bindDescriptorSets(
             const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout,
@@ -442,6 +443,14 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
         }
         bool dispatchIndirect(const asset::SBufferBinding<const IGPUBuffer>& binding);
 
+        bool drawMeshTasks(const uint32_t groupCountX, const uint32_t groupCountY = 1, const uint32_t groupCountZ = 1);
+        template<typename T> requires std::is_integral_v<T>
+        bool drawMeshTasks(const hlsl::vector<T, 3> groupCount)
+        {
+            return drawMeshTasks(groupCount.x, groupCount.y, groupCount.z);
+        }
+        bool drawMeshTasksIndirect(const asset::SBufferBinding<const IGPUBuffer>& binding, const uint32_t drawCount, const uint32_t stride);
+
         //! Begin/End RenderPasses
         struct SRenderpassBeginInfo
         {
@@ -587,6 +596,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
         inline const core::unordered_map<const IGPUDescriptorSet*, uint64_t>& getBoundDescriptorSetsRecord() const { return m_boundDescriptorSetsRecord; }
         const IGPUGraphicsPipeline* getBoundGraphicsPipeline() const { return m_boundGraphicsPipeline; }
         const IGPUComputePipeline* getBoundComputePipeline() const { return m_boundComputePipeline; }
+        const IGPUMeshPipeline* getBoundMeshPipeline() const { return m_boundMeshPipeline; }
         const IGPURayTracingPipeline* getBoundRayTracingPipeline() const { return m_boundRayTracingPipeline; }
 
     protected: 
@@ -670,8 +680,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
         virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding<IGPUBuffer>& dst) = 0;
         virtual bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding<const IGPUBuffer>& src, IGPUAccelerationStructure* dst) = 0;
 
-        virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0;
         virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0;
+        virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0;
+        virtual bool bindMeshPipeline_impl(const IGPUMeshPipeline* const pipeline) = 0;
         virtual bool bindRayTracingPipeline_impl(const IGPURayTracingPipeline* const pipeline) = 0;
         virtual bool bindDescriptorSets_impl(
             const asset::E_PIPELINE_BIND_POINT pipelineBindPoint, const IGPUPipelineLayout* const layout,
@@ -702,6 +713,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
         virtual bool dispatch_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0;
         virtual bool dispatchIndirect_impl(const asset::SBufferBinding<const IGPUBuffer>& binding) = 0;
 
+        virtual bool drawMeshTasks_impl(const uint32_t groupCountX, const uint32_t groupCountY, const uint32_t groupCountZ) = 0;
+        virtual bool drawMeshTasksIndirect_impl(const asset::SBufferBinding<const IGPUBuffer>& binding, const uint32_t drawCount, const uint32_t stride) = 0;
+
         virtual bool beginRenderPass_impl(const SRenderpassBeginInfo& info, SUBPASS_CONTENTS contents) = 0;
         virtual bool nextSubpass_impl(const SUBPASS_CONTENTS contents) = 0;
         virtual bool endRenderPass_impl() = 0;
@@ -750,9 +764,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
 
             m_boundDescriptorSetsRecord.clear();
             m_TLASTrackingOps.clear();
-            m_boundGraphicsPipeline= nullptr;
-            m_boundComputePipeline= nullptr;
-            m_boundRayTracingPipeline= nullptr;
+            m_boundGraphicsPipeline = nullptr;
+            m_boundComputePipeline = nullptr;
+            m_boundMeshPipeline = nullptr;
+            m_boundRayTracingPipeline = nullptr;
             m_haveRtPipelineStackSize = false;
 
             m_commandList.head = nullptr;
@@ -770,6 +785,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
             m_TLASTrackingOps.clear();
             m_boundGraphicsPipeline= nullptr;
             m_boundComputePipeline= nullptr;
+            m_boundMeshPipeline = nullptr;
             m_boundRayTracingPipeline= nullptr;
             m_haveRtPipelineStackSize = false;
             releaseResourcesBackToPool_impl();
@@ -931,6 +947,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
 
         const IGPUGraphicsPipeline* m_boundGraphicsPipeline;
         const IGPUComputePipeline* m_boundComputePipeline;
+        const IGPUMeshPipeline* m_boundMeshPipeline;
         const IGPURayTracingPipeline* m_boundRayTracingPipeline;
 
         IGPUCommandPool::CCommandSegmentListPool::SCommandSegmentList m_commandList = {};