diff --git a/h3d/impl/DX12Driver.hx b/h3d/impl/DX12Driver.hx index e87d40f5a..e30d80ccc 100644 --- a/h3d/impl/DX12Driver.hx +++ b/h3d/impl/DX12Driver.hx @@ -302,10 +302,10 @@ class CompiledShader { class BaseHeap { public var stride(default,null) : Int; - var size : Int; + public var size(default,null) : Int; + public var address(default,null) : Address; var type : DescriptorHeapType; var heap : DescriptorHeap; - var address : Address; var cpuToGpu : Int64; var shaderVisible : Bool; @@ -328,6 +328,10 @@ class BaseHeap { cpuToGpu = desc.flags == SHADER_VISIBLE ? ( heap.getHandle(true).value - address.value ) : 0; } + public dynamic function onFree( prev : DescriptorHeap, prevSize : Int ) { + throw "Too many buffers"; + } + public inline function toGPU( address : Address ) : Address { return new Address(address.value + cpuToGpu); } @@ -354,10 +358,6 @@ class ScratchHeap extends BaseHeap { super.allocHeap(size); } - public dynamic function onFree( prev : DescriptorHeap, prevSize : Int ) { - throw "Too many buffers"; - } - public function alloc( count : Int ) { if( cursor + count > size ) { var prevCursor = cursor; @@ -382,6 +382,7 @@ class ScratchHeap extends BaseHeap { class BlockHeap extends BaseHeap { var freeList : Array; + public var available(get,never) : Int; override public function new(type,size,shaderVisible) { if ( shaderVisible ) @@ -390,18 +391,20 @@ class BlockHeap extends BaseHeap { freeList = [for (i in 0...size) i]; } + function resize() { + var prevSize = size; + var prev = heap; + allocHeap(getNextHeapSize()); + for ( i in prevSize + 1...size) + freeList.push(i); + onFree(prev, prevSize); + } + public function allocIndex() : Int { var idx = freeList.pop(); if ( idx == null ) { - var prevAddress = address.value; - var prevSize = size; - var prev = heap; - allocHeap(getNextHeapSize()); - Driver.copyDescriptorsSimple(prevSize, address.value, prevAddress, type); - idx = prevSize; - for ( i in prevSize + 1...size) - freeList.push(i); - (prev : Resource).release(); + idx = size; + resize(); } return idx; } @@ -409,6 +412,14 @@ class BlockHeap extends BaseHeap { public function disposeIndex( index : Int ) { freeList.push(index); } + + inline function get_available() { + return freeList.length; + } + + public inline function isEmpty() { + return available == size; + } } class ResourceData { @@ -441,6 +452,7 @@ class TextureData extends ResourceData { public var uploadBuffer : TextureUploadBuffer; var clearColorChanges : Int; public var cpuViewsIndex : Array = [for (i in 0...16) -1]; + public var handles : Map; public function setClearColor( c : h3d.Vector4 ) { var color = color; @@ -478,6 +490,8 @@ class DX12Driver extends h3d.impl.Driver { var cpuSrvHeap : BlockHeap; var cpuSamplerHeap : ScratchHeap; var cpuSamplersIndex : Map; + var bindlessSrvHeap : BlockHeap; + var bindlessSamplerHeap : BlockHeap; var indirectCommand : CommandSignature; var currentFrame : Int; @@ -546,8 +560,8 @@ class DX12Driver extends h3d.impl.Driver { f.allocator = new CommandAllocator(DIRECT); f.commandList = new CommandList(DIRECT, f.allocator, null); f.commandList.close(); - f.srvHeapCache = new ScratchHeapArray(CBV_SRV_UAV, INITIAL_SRV_COUNT); - f.samplerHeapCache = new ScratchHeapArray(SAMPLER, INITIAL_SAMPLER_COUNT); + f.srvHeapCache = new ScratchHeapArray(CBV_SRV_UAV, INITIAL_SRV_COUNT * 2); + f.samplerHeapCache = new ScratchHeapArray(SAMPLER, INITIAL_SAMPLER_COUNT * 2); if ( f.bufferAllocator != null ) f.bufferAllocator.dispose(); f.bufferAllocator = new BufferAllocator(INITIAL_BUFFER_ALLOCATOR_SIZE); @@ -563,14 +577,32 @@ class DX12Driver extends h3d.impl.Driver { depthStenciViews.onFree = function(prev, prevSize) frame.toRelease.push(prev); cpuSrvHeap = new BlockHeap(CBV_SRV_UAV, INITIAL_SRV_COUNT, false); + cpuSrvHeap.onFree = function(prev, prevSize) @:privateAccess { + Driver.copyDescriptorsSimple(prevSize, cpuSrvHeap.address.value, prev.getHandle(false).value, CBV_SRV_UAV); + (prev : Resource).release(); + } + cpuSamplerHeap = new ScratchHeap(SAMPLER, INITIAL_SAMPLER_COUNT, false); cpuSamplerHeap.onFree = function(prev, prevSize) @:privateAccess { Driver.copyDescriptorsSimple(prevSize, cpuSamplerHeap.address.value, prev.getHandle(false).value, SAMPLER); - cpuSamplerHeap.alloc(prevSize); + cpuSamplerHeap.cursor = prevSize; (prev : Resource).release(); } cpuSamplersIndex = []; + bindlessSrvHeap = new BlockHeap(CBV_SRV_UAV, INITIAL_SRV_COUNT, false); + bindlessSrvHeap.onFree = function(prev, prevSize) @:privateAccess { + Driver.copyDescriptorsSimple(prevSize, bindlessSrvHeap.address.value, prev.getHandle(false).value, CBV_SRV_UAV); + (prev : Resource).release(); + flushHeaps(); + } + bindlessSamplerHeap = new BlockHeap(SAMPLER, INITIAL_SAMPLER_COUNT, false); + bindlessSamplerHeap.onFree = function(prev, prevSize) @:privateAccess { + Driver.copyDescriptorsSimple(prevSize, bindlessSamplerHeap.address.value, prev.getHandle(false).value, SAMPLER); + (prev : Resource).release(); + flushHeaps(); + } + if ( h3d.Engine.getCurrent() != null ) { defaultDepth = new h3d.mat.Texture(0,0, Depth24Stencil8); defaultDepth.t = new TextureData(); @@ -595,7 +627,6 @@ class DX12Driver extends h3d.impl.Driver { function beginFrame() { frameCount = hxd.Timer.frameCount; - heapCount++; currentFrame = Driver.getCurrentBackBufferIndex(); var prevFrame = frame; frame = frames[currentFrame]; @@ -648,13 +679,7 @@ class DX12Driver extends h3d.impl.Driver { frame.srvHeapCache.reset(); frame.samplerHeapCache.reset(); - frame.srvHeap = frame.srvHeapCache.next(); - frame.samplerHeap = frame.samplerHeapCache.next(); - - var arr = tmp.descriptors2; - arr[0] = @:privateAccess frame.srvHeap.heap; - arr[1] = @:privateAccess frame.samplerHeap.heap; - frame.commandList.setDescriptorHeaps(arr); + flushHeaps(); } override function clear(?color:Vector4, ?depth:Float, ?stencil:Int) { @@ -1511,6 +1536,10 @@ class DX12Driver extends h3d.impl.Driver { sign.flags.set(DENY_VERTEX_SHADER_ROOT_ACCESS); } else sign.flags.set(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT); + if ( shader.hasBindless() ) { + sign.flags.set(CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED); + sign.flags.set(SAMPLER_HEAP_DIRECTLY_INDEXED); + } sign.flags.set(DENY_HULL_SHADER_ROOT_ACCESS); sign.flags.set(DENY_DOMAIN_SHADER_ROOT_ACCESS); sign.flags.set(DENY_GEOMETRY_SHADER_ROOT_ACCESS); @@ -1531,9 +1560,9 @@ class DX12Driver extends h3d.impl.Driver { var c = new CompiledShader(); var rootStr = stringifyRootSignature(res.sign, "ROOT_SIGNATURE", res.params, res.paramsCount); - var vs = shader.mode == Compute ? null : compileSource(shader.vertex, "vs_6_0", rootStr); - var ps = shader.mode == Compute ? null : compileSource(shader.fragment, "ps_6_0", rootStr); - var cs = shader.mode == Compute ? compileSource(shader.compute, "cs_6_0", rootStr) : null; + var vs = shader.mode == Compute ? null : compileSource(shader.vertex, "vs_6_6", rootStr); + var ps = shader.mode == Compute ? null : compileSource(shader.fragment, "ps_6_6", rootStr); + var cs = shader.mode == Compute ? compileSource(shader.compute, "cs_6_6", rootStr) : null; var signSize = 0; var signBytes = Driver.serializeRootSignature(res.sign, 1, signSize); @@ -2187,6 +2216,10 @@ class DX12Driver extends h3d.impl.Driver { frame.commandList.setComputeRoot32BitConstants(regs.params, dataSize >> 2, data, 0); else frame.commandList.setGraphicsRoot32BitConstants(regs.params, dataSize >> 2, data, 0); + for ( i in 0...shader.paramsHandleCount ) { + var handle = buf.handles[i]; + transition(handle.texture.t, shader.kind == Fragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE); + } } case Globals: var isFragment = shader.kind == Fragment; @@ -2211,6 +2244,12 @@ class DX12Driver extends h3d.impl.Driver { frame.commandList.setComputeRoot32BitConstants(regs.globals, dataSize >> 2, data, 0); else frame.commandList.setGraphicsRoot32BitConstants(regs.globals, dataSize >> 2, data, 0); + var startIdx = shader.paramsHandleCount; + var lastIdx = startIdx + shader.globalsHandleCount; + for ( i in startIdx...lastIdx ) { + var handle = buf.handles[i]; + transition(handle.texture.t, isFragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE); + } } if ( isFragment ) lastFragmentGlobalBind = bind; @@ -2438,6 +2477,13 @@ class DX12Driver extends h3d.impl.Driver { } } + override function selectTextureHandles( handles : Array ) { + for( i in 0...handles.length ) { + var th = handles[i]; + transition(th.texture.t, PIXEL_SHADER_RESOURCE); + } + } + override function selectBuffer(buffer:Buffer) { var views = tmp.vertexViews; var bview = buffer.vbuf.view; @@ -2700,15 +2746,24 @@ class DX12Driver extends h3d.impl.Driver { } } - override function flushShaderBuffers() { - if( frame.srvHeap.available < 128 || frame.samplerHeap.available < 64 ) { - frame.srvHeap = frame.srvHeapCache.next(); - frame.samplerHeap = frame.samplerHeapCache.next(); - heapCount++; - var arr = tmp.descriptors2; - arr[0] = @:privateAccess frame.srvHeap.heap; - arr[1] = @:privateAccess frame.samplerHeap.heap; - frame.commandList.setDescriptorHeaps(arr); + function flushHeaps(rebind : Bool = false) { + frame.srvHeap = frame.srvHeapCache.next(); + frame.samplerHeap = frame.samplerHeapCache.next(); + heapCount++; + var arr = tmp.descriptors2; + arr[0] = @:privateAccess frame.srvHeap.heap; + arr[1] = @:privateAccess frame.samplerHeap.heap; + frame.commandList.setDescriptorHeaps(arr); + + if ( !bindlessSrvHeap.isEmpty() ) + Driver.copyDescriptorsSimple( bindlessSrvHeap.size, frame.srvHeap.address.value, bindlessSrvHeap.address.value, CBV_SRV_UAV ); + if ( !bindlessSamplerHeap.isEmpty() ) + Driver.copyDescriptorsSimple( bindlessSamplerHeap.size, frame.samplerHeap.address.value, bindlessSamplerHeap.address.value, SAMPLER ); + + @:privateAccess frame.srvHeap.cursor = bindlessSrvHeap.size; + @:privateAccess frame.samplerHeap.cursor = bindlessSamplerHeap.size; + + if ( rebind ) { inline function rebindGlobal(bindSlot, desc) { if ( bindSlot >= 0 ) { var srv = frame.srvHeap.alloc(1); @@ -2722,9 +2777,22 @@ class DX12Driver extends h3d.impl.Driver { rebindGlobal(lastVertexGlobalBind, tmp.vertexGlobalDesc); rebindGlobal(lastFragmentGlobalBind, tmp.fragmentGlobalDesc); + + if ( currentShader.shader.hasBindless() ) { + if ( currentShader.isCompute ) + frame.commandList.setComputeRootSignature(currentShader.rootSignature); + else + frame.commandList.setGraphicsRootSignature(currentShader.rootSignature); + frame.commandList.setPipelineState(currentPipelineState); + } } } + override function flushShaderBuffers() { + if( frame.srvHeap.available < 128 || frame.samplerHeap.available < 64 ) + flushHeaps(true); + } + function flushFrame( onResize : Bool = false ) { flushQueries(); frame.commandList.close(); @@ -2775,6 +2843,27 @@ class DX12Driver extends h3d.impl.Driver { needUAVBarrier = true; } + override function getTextureHandle( t : h3d.mat.Texture ) : h3d.mat.TextureHandle { + var handle : h3d.mat.TextureHandle = null; + if ( t.t.handles == null ) + t.t.handles = [] + else + handle = t.t.handles.get(t.bits); + if ( handle == null ) { + var sampler = getCpuSampler(t); + var srv = getCpuTexView(t); + var srvIndex = bindlessSrvHeap.allocIndex(); + var samplerIndex = bindlessSamplerHeap.allocIndex(); + Driver.copyDescriptorsSimple(1, bindlessSrvHeap.getCpuAddressAt(srvIndex), srv, CBV_SRV_UAV); + Driver.copyDescriptorsSimple(1, frame.srvHeap.getCpuAddressAt(srvIndex), srv, CBV_SRV_UAV); + Driver.copyDescriptorsSimple(1, bindlessSamplerHeap.getCpuAddressAt(samplerIndex), sampler, SAMPLER); + Driver.copyDescriptorsSimple(1, frame.samplerHeap.getCpuAddressAt(samplerIndex), sampler, SAMPLER); + handle = new h3d.mat.TextureHandle(t, haxe.Int64.make(samplerIndex, srvIndex)); + t.t.handles.set(t.bits, handle); + } + return handle; + } + } #end diff --git a/h3d/impl/DirectXDriver.hx b/h3d/impl/DirectXDriver.hx index 9b8b2af76..511d012c0 100644 --- a/h3d/impl/DirectXDriver.hx +++ b/h3d/impl/DirectXDriver.hx @@ -915,7 +915,7 @@ class DirectXDriver extends h3d.impl.Driver { override function hasFeature(f:Feature) { return switch(f) { - case Queries, BottomLeftCoords: + case Queries, BottomLeftCoords, Bindless: false; default: true; diff --git a/h3d/impl/Driver.hx b/h3d/impl/Driver.hx index 9b8ab1e24..cf91e3aa0 100644 --- a/h3d/impl/Driver.hx +++ b/h3d/impl/Driver.hx @@ -81,6 +81,10 @@ enum Feature { Supports instanced rendering */ InstancedRendering; + /* + Supports bindless + */ + Bindless; } enum QueryKind { @@ -183,6 +187,9 @@ class Driver { public function selectMaterial( pass : h3d.mat.Pass ) { } + public function selectTextureHandles( handles : Array ) { + } + public function uploadShaderBuffers( buffers : h3d.shader.Buffers, which : h3d.shader.Buffers.BufferKind ) { } @@ -320,4 +327,9 @@ class Driver { throw "Compute shaders are not implemented on this platform"; } + // --- Bindless + + public function getTextureHandle( t : h3d.mat.Texture ) : h3d.mat.TextureHandle { + throw "Bindless is not implemented on this platform"; + } } \ No newline at end of file diff --git a/h3d/impl/GlDriver.hx b/h3d/impl/GlDriver.hx index 45847e258..7f904106d 100644 --- a/h3d/impl/GlDriver.hx +++ b/h3d/impl/GlDriver.hx @@ -1958,11 +1958,16 @@ class GlDriver extends Driver { } override function hasFeature( f : Feature ) : Bool { - #if js - return features.get(f); - #else - return true; - #end + return switch(f) { + case Bindless: + false; + default: + #if js + features.get(f); + #else + true; + #end + }; } #if js diff --git a/h3d/impl/RenderContext.hx b/h3d/impl/RenderContext.hx index 5049cf364..f0d83c263 100644 --- a/h3d/impl/RenderContext.hx +++ b/h3d/impl/RenderContext.hx @@ -168,6 +168,10 @@ class RenderContext { for( vv in vl ) tot += fillRec(Reflect.field(v, vv.name), vv.type, out, pos + tot); return tot; + case TTextureHandle: + var v : h3d.mat.TextureHandle = v; + fillIntParam(v.handle.low, pos, out); + fillIntParam(v.handle.high, pos + 1, out); default: throw "assert " + type; } @@ -214,10 +218,13 @@ class RenderContext { inline function fill(buf:h3d.shader.Buffers.ShaderBuffers, s:hxsl.RuntimeShader.RuntimeShaderData) { var g = s.globals; var ptr = getPtr(buf.globals); + var hid = s.paramsHandleCount; while( g != null ) { - var v = globals.fastGet(g.gid); + var v : Dynamic = globals.fastGet(g.gid); if( v == null ) throw "Missing global value " + g.path; + if ( g.type.match(TTextureHandle) ) + buf.handles[hid++] = v; fillRec(v, g.type, ptr, g.pos); g = g.next; } @@ -255,6 +262,7 @@ class RenderContext { inline function fill(buf:h3d.shader.Buffers.ShaderBuffers, s:hxsl.RuntimeShader.RuntimeShaderData) { var p = s.params; var ptr = getPtr(buf.params); + var hid = 0; while( p != null ) { var v : Dynamic; if( p.perObjectGlobal == null ) { @@ -268,9 +276,12 @@ class RenderContext { fillIntParam(Std.int(i.getParamFloatValue(p.index)), p.pos, ptr); p = p.next; continue; + case TTextureHandle: + v = i.getParamValue(p.index); + buf.handles[hid++] = v; default: + v = i.getParamValue(p.index); } - v = i.getParamValue(p.index); if( v == null ) throw "Missing param value " + curInstanceValue + "." + p.name; } else v = getParamValue(p, shaders); diff --git a/h3d/mat/Texture.hx b/h3d/mat/Texture.hx index 74a0d8791..dccfa9ba2 100644 --- a/h3d/mat/Texture.hx +++ b/h3d/mat/Texture.hx @@ -372,6 +372,10 @@ class Texture { } } + public function getHandle() : h3d.mat.TextureHandle { + return mem.driver.getTextureHandle(this); + } + /** This will return the default depth buffer, which is automatically resized to the screen size. **/ diff --git a/h3d/mat/TextureHandle.hx b/h3d/mat/TextureHandle.hx new file mode 100644 index 000000000..157b88db3 --- /dev/null +++ b/h3d/mat/TextureHandle.hx @@ -0,0 +1,11 @@ +package h3d.mat; + +@:allow(h3d.impl.Driver) +class TextureHandle { + public var texture(default, null) : h3d.mat.Texture; + public var handle(default, null) : haxe.Int64; + function new(t : h3d.mat.Texture, handle : haxe.Int64) { + texture = t; + this.handle = handle; + } +} \ No newline at end of file diff --git a/h3d/scene/MeshBatch.hx b/h3d/scene/MeshBatch.hx index 73fa339bd..9f3d73b67 100644 --- a/h3d/scene/MeshBatch.hx +++ b/h3d/scene/MeshBatch.hx @@ -8,6 +8,7 @@ enum MeshBatchFlag { EnableCpuLod; ForceGpuUpdate; EnableSubMesh; + EnablePerInstanceTexture; } typedef CpuIndirectCallBuffer = { bytes : haxe.io.Bytes, count : Int }; @@ -117,6 +118,13 @@ class MeshBatch extends MultiMaterial { meshBatchFlags.set(EnableStorageBuffer); } + /** + * Enable per instance texture if bindless is supported. + */ + public function enablePerInstanceTexture() { + meshBatchFlags.set(EnablePerInstanceTexture); + } + public function enableCpuLod() { var prim = getPrimitive(); var lodCount = prim.lodCount(); @@ -184,11 +192,59 @@ class MeshBatch extends MultiMaterial { } } + inline function initPerInstanceTexture() { + var shaderVisited : Map = []; + + if ( instancedParams == null ) + instancedParams = new hxsl.Cache.BatchInstanceParams([]); + inline function findInstancedParams(shaderName : String) { + var result = null; + for ( p in @:privateAccess instancedParams.forcedPerInstance ) { + if ( p.shader == shaderName ) { + result = p.params; + break; + } + } + + if ( result == null ) { + result = []; + @:privateAccess instancedParams.forcedPerInstance.push( { shader: shaderName, params : result } ); + } + + return result; + } + + for ( m in materials ) { + for ( p in m.getPasses() ) { + for ( s in p.getShaders() ) { + var ss = @:privateAccess s.shader; + var name = ss.data.name; + if ( shaderVisited.exists(name) ) + continue; + shaderVisited.set(name, true); + var params = null; + for ( v in ss.data.vars ) { + if ( v.kind != Param || !v.type.match(TSampler(_)) ) + continue; + if ( params == null ) + params = findInstancedParams(name); + if ( params.indexOf(v.name) < 0 ) + params.push(v.name); + } + } + } + } + } + function initShadersMapping() { var scene = getScene(); if( scene == null ) return; cleanPasses(); updateHasPrimitiveOffset(); + + if ( meshBatchFlags.has(EnablePerInstanceTexture) && @:privateAccess scene.ctx.engine.driver.hasFeature(Bindless) ) + initPerInstanceTexture(); + for( index in 0...materials.length ) { var mat = materials[index]; if( mat == null ) continue; @@ -566,6 +622,21 @@ class MeshBatch extends MultiMaterial { case TMat4: var m : h3d.Matrix = curShader.getParamValue(p.index); bufLoader.loadMatrix(m); + case TTextureHandle: + if ( batch.textureHandles == null ) + batch.textureHandles = []; + var v : h3d.mat.TextureHandle = curShader.getParamValue(p.index); + batch.textureHandles.push(v); + bufLoader.loadInt(v.handle.low); + bufLoader.loadInt(v.handle.high); + case TSampler(_): + if ( batch.textureHandles == null ) + batch.textureHandles = []; + var v : h3d.mat.Texture = curShader.getParamValue(p.index); + var h = v.getHandle(); + batch.textureHandles.push(h); + bufLoader.loadInt(h.handle.low); + bufLoader.loadInt(h.handle.high); default: throw "Unsupported batch type "+p.type; } @@ -616,6 +687,8 @@ class MeshBatch extends MultiMaterial { p = p.next; } ctx.uploadParams(); + if ( p.textureHandles != null ) + ctx.selectTextureHandles(p.textureHandles); var prev = ctx.drawPass.index; ctx.drawPass.index >>= 16; super.draw(ctx); @@ -714,6 +787,7 @@ class BatchData { public var indirectCallBuffers : Array; public var buffers : Array = []; public var bufferFormat : hxd.BufferFormat; + public var textureHandles : Array; public var data : hxd.FloatBuffer; public var params : hxsl.RuntimeShader.AllocParam; public var shader : hxsl.BatchShader; diff --git a/h3d/scene/RenderContext.hx b/h3d/scene/RenderContext.hx index deb0852cd..e8dcc78fa 100644 --- a/h3d/scene/RenderContext.hx +++ b/h3d/scene/RenderContext.hx @@ -255,6 +255,10 @@ class RenderContext extends h3d.impl.RenderContext { return useReverseDepth ? 0 : 1; } + public function selectTextureHandles(handles : Array) { + engine.driver.selectTextureHandles(handles); + } + public function uploadParams() { fillParams(shaderBuffers, drawPass.shader, drawPass.shaders); engine.uploadInstanceShaderBuffers(shaderBuffers); diff --git a/h3d/shader/Base2d.hx b/h3d/shader/Base2d.hx index 6eb5cbf3d..22d0a1b41 100644 --- a/h3d/shader/Base2d.hx +++ b/h3d/shader/Base2d.hx @@ -42,6 +42,10 @@ class Base2d extends hxsl.Shader { var outputPosition : Vec4; + function __init__vertex() { + calculatedUV = hasUVPos ? input.uv * uvPos.zw + uvPos.xy : input.uv; + } + function __init__() { spritePosition = vec4(input.position, zValue, 1); if( isRelative ) { @@ -50,7 +54,6 @@ class Base2d extends hxsl.Shader { absolutePosition.zw = spritePosition.zw; } else absolutePosition = spritePosition; - calculatedUV = hasUVPos ? input.uv * uvPos.zw + uvPos.xy : input.uv; pixelColor = isRelative ? color * input.color : input.color; textureColor = texture.get(calculatedUV); pixelColor *= textureColor; diff --git a/h3d/shader/Buffers.hx b/h3d/shader/Buffers.hx index f315d2126..60101b9fc 100644 --- a/h3d/shader/Buffers.hx +++ b/h3d/shader/Buffers.hx @@ -15,6 +15,7 @@ class ShaderBuffers { public var params : ShaderBufferData; public var tex : haxe.ds.Vector; public var buffers : haxe.ds.Vector; + public var handles : haxe.ds.Vector; public function new() { globals = new ShaderBufferData(0); @@ -27,10 +28,12 @@ class ShaderBuffers { var np = s.paramsSize << 2; var nt = s.texturesCount; var nb = s.bufferCount; + var nh = s.globalsHandleCount + s.paramsHandleCount; if( globals.length < ng ) globals = new ShaderBufferData(ng); if( params.length < np ) params = new ShaderBufferData(np); if( tex.length < nt ) tex = new haxe.ds.Vector(nt); if( nb > 0 && (buffers == null || buffers.length < nb) ) buffers = new haxe.ds.Vector(nb); + if( nh > 0 && (handles == null || handles.length < nh) ) handles = new haxe.ds.Vector(nh); } } diff --git a/hxsl/Ast.hx b/hxsl/Ast.hx index fdf5f4073..db56c8f60 100644 --- a/hxsl/Ast.hx +++ b/hxsl/Ast.hx @@ -35,6 +35,7 @@ enum Type { TArray( t : Type, size : SizeDecl ); TBuffer( t : Type, size : SizeDecl, kind : BufferKind ); TChannel( size : Int ); + TTextureHandle; } enum VecType { @@ -103,6 +104,7 @@ enum VarQualifier { Sampler( name : String ); Final; Flat; + NoVar; } enum Prec { @@ -316,6 +318,7 @@ enum TGlobal { UnpackUnorm4x8; Transpose; TexelLod; + ResolveSampler; } enum SyntaxArgAccess { @@ -544,7 +547,7 @@ class Tools { return true; case TCall(e, pl): switch( e.e ) { - case TGlobal( ImageStore | AtomicAdd | GroupMemoryBarrier ): + case TGlobal( ImageStore | AtomicAdd | GroupMemoryBarrier | ResolveSampler ): return true; case TGlobal(g): default: @@ -643,6 +646,7 @@ class Tools { case TBool, TString, TSampler(_), TRWTexture(_), TFun(_): 0; case TArray(t, SConst(v)), TBuffer(t, SConst(v),_): size(t) * v; case TArray(_, SVar(_)), TBuffer(_): 0; + case TTextureHandle: 2; } } diff --git a/hxsl/Cache.hx b/hxsl/Cache.hx index 680c7fb5b..de886adf2 100644 --- a/hxsl/Cache.hx +++ b/hxsl/Cache.hx @@ -456,6 +456,8 @@ class Cache { var count = 0; for( a in alloc ) { if( a.v == null ) continue; // padding + if ( a.v.type.match(TTextureHandle) ) + c.paramsHandleCount++; var p = params.get(a.v.id); if( p == null ) { var ap = new AllocParam(a.v.name, a.pos, -1, -1, a.v.type); @@ -491,7 +493,14 @@ class Cache { default: throw "assert"; } case Global: - var out = [for( a in alloc ) if( a.v != null ) new AllocGlobal(a.pos, getPath(a.v), a.v.type)]; + var out = [ + for( a in alloc ) + if( a.v != null ) { + if ( a.v.type.match(TTextureHandle) ) + c.globalsHandleCount++; + new AllocGlobal(a.pos, getPath(a.v), a.v.type); + } + ]; for( i in 0...out.length - 1 ) out[i].next = out[i + 1]; switch( g.type ) { @@ -557,6 +566,17 @@ class Cache { if( c.params == null ) c.paramsSize = 0; c.data = data; + c.hasBindless = c.globalsHandleCount > 0 || c.paramsHandleCount > 0; + if ( !c.hasBindless ) { + for ( v in c.data.vars ) { + switch ( v.type ) { + case TTextureHandle: + c.hasBindless = true; + break; + default: + } + } + } return c; } @@ -705,6 +725,8 @@ class Cache { case TMat4: 4 * 4; case TVec(n,VFloat): n; case TFloat, TInt: 1; + case TTextureHandle: 2; + case TSampler(_): 2; default: throw "Unsupported batch var type "+p.type; } var index; @@ -760,21 +782,19 @@ class Cache { return false; } - var p = rt.vertex.params; - while( p != null ) { - var v = getVar(p); - if( isPerInstance(p, v) ) - addParam(p); - p = p.next; - } - var p = rt.fragment.params; - while( p != null ) { - var v = getVar(p); - if( isPerInstance(p, v) ) - addParam(p); - p = p.next; + inline function addPerInstance(p : RuntimeShader.AllocParam) { + while( p != null ) { + var v = getVar(p); + if( isPerInstance(p, v) ) + addParam(p); + p = p.next; + } } + addPerInstance(rt.vertex.params); + addPerInstance(rt.fragment.params); + addPerInstance(rt.vertex.textures); + addPerInstance(rt.fragment.textures); var parentVars = new Map(); var swiz = [[X],[Y],[Z],[W]]; @@ -813,6 +833,10 @@ class Cache { return { e : TCall({ e : TGlobal(FloatBitsToInt), t : TFun([]), p : e.p }, [e]), t : TInt, p : e.p }; } + inline function floatBitsToUint( e : TExpr ) { + return { e : TCall({ e : TGlobal(FloatBitsToUint), t : TFun([]), p : e.p }, [e]), t : TInt, p : e.p }; + } + function extractVar( vreal, ebuffer, v : AllocParam ) { var index = (v.pos>>2); var extract = switch( v.type ) { @@ -834,6 +858,25 @@ class Cache { default: [Z,W]; } { p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),swiz) }; + case TTextureHandle: + var swiz = switch( v.pos & 3 ) { + case 0: [X,Y]; + case 1: [Y,Z]; + default: [Z,W]; + } + floatBitsToUint({ p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),swiz) }); + case TSampler(_): + var vh = new AllocParam(v.name + "Handle", v.pos, v.instance, v.index, TTextureHandle); + var vhreal = declareLocalVar(vh); + var swiz = switch( vh.pos & 3 ) { + case 0: [X,Y]; + case 1: [Y,Z]; + default: [Z,W]; + } + var extract = floatBitsToUint({ p : pos, t : vh.type, e : TSwiz(readOffset(ebuffer, index),swiz) }); + var einitHandle = { p : pos, e : TBinop(OpAssign, { e : TVar(vhreal), p : pos, t : vh.type }, extract), t : TVoid }; + var eresolveTex = { p : pos, e : TCall({e : TGlobal(ResolveSampler), t : TFun([]), p : pos }, [{ e : TVar(vhreal), t : vh.type, p : pos }, { e : TVar(vreal), t : v.type, p : pos }]), t : TVoid }; + return { p : pos, e : TBlock([einitHandle, eresolveTex]), t : TVoid }; case TFloat: { p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),swiz[v.pos&3]) }; case TInt: @@ -880,21 +923,7 @@ class Cache { e : TBinop(OpAssignOp(OpMult),eoffset,{ e : TConst(CInt(stride)), t : TInt, p : pos }), }); - inits.push({ - p : pos, - e : TIf({ e : TVar(useStorage), t : TBool, p : pos },{ - p : pos, - e : TBlock(exprsStorage), - t : TVoid, - }, { - p : pos, - e : TBlock(exprsUniform), - t : TVoid, - }), - t : TVoid, - }); - - var fv : TVar = declVar("init",TFun([]), Function); + var fv : TVar = declVar("__init__vertex",TFun([]), Function); var f : TFunction = { kind : Init, ref : fv, @@ -902,7 +931,26 @@ class Cache { ret : TVoid, expr : { e : TBlock(inits), p : pos, t : TVoid }, }; + + var pinits = []; + for ( i in 0...exprsStorage.length) { + pinits.push({ + p : pos, + e : TIf({ e : TVar(useStorage), t : TBool, p : pos }, exprsStorage[i], exprsUniform[i]), + t : TVoid, + }); + } + + var fpv : TVar = declVar("__init__",TFun([]), Function); + var fp : TFunction = { + kind : Init, + ref : fpv, + args : [], + ret : TVoid, + expr : { e : TBlock(pinits), p : pos, t : TVoid }, + }; s.data.funs.push(f); + s.data.funs.push(fp); s.consts = new SharedShader.ShaderConst(vcount,2,countBits+1); s.consts.globalId = 0; s.consts.next = new SharedShader.ShaderConst(hasOffset,0,1); diff --git a/hxsl/Checker.hx b/hxsl/Checker.hx index 674ed09c1..0d7ddd593 100644 --- a/hxsl/Checker.hx +++ b/hxsl/Checker.hx @@ -228,6 +228,8 @@ class Checker { [ { args : [ { name : "value", type : TInt } ], ret : vec4 } ]; case UnpackUnorm4x8: [ { args : [ { name : "value", type : TInt } ], ret : vec4 } ]; + case ResolveSampler: + [for( t in texDefs ) { args : [{ name : "handle", type : TTextureHandle }, { name : "tex", type : TSampler(t.dim,t.arr) }], ret : TVoid }]; default: throw "Unsupported global "+g; } @@ -966,6 +968,7 @@ class Checker { } case Ignore, Doc(_): case Flat: if( tv.kind != Local ) error("flat only allowed on local", pos); + case NoVar: if( tv.kind != Local ) error("noVar only allowed on local", pos); } } if( tv.type != null ) diff --git a/hxsl/Dce.hx b/hxsl/Dce.hx index bc19570ba..3d7979d91 100644 --- a/hxsl/Dce.hx +++ b/hxsl/Dce.hx @@ -279,6 +279,12 @@ class Dce { check(data, writeTo, isAffected); writeTo.pop(); isAffected.append(v, 15); + case TCall({ e : TGlobal(ResolveSampler)}, [handle, { e : TVar(v)}]): + var v = get(v); + writeTo.push(v, 15); + check(handle, writeTo, isAffected); + writeTo.pop(); + isAffected.append(v, 15); case TSyntax(_, _, args): for ( arg in args ) { if ( arg.access != Read ) { @@ -358,6 +364,10 @@ class Dce { case TCall({ e : TGlobal(ChannelTextureSize) }, [_, lod, { e : TConst(CInt(cid)) }]): var c = channelVars[cid]; return { e : TCall({ e : TGlobal(TextureSize), p : e.p, t : TVoid }, [{ e : TVar(c), t : c.type, p : e.p }, mapExpr(lod,true)]), t : TVoid, p : e.p }; + case TCall({ e : TGlobal(ResolveSampler)}, [handle, { e : TVar(v)}]): + if (get(v).used == 0) + return { e : TConst(CNull), t : e.t, p : e.p }; + return e.map(function(e) return mapExpr(e,true)); case TIf(e, econd, eelse): var e = mapExpr(e, true); var econd = mapExpr(econd, isVar); diff --git a/hxsl/Flatten.hx b/hxsl/Flatten.hx index 40a5c382f..9cbdf3436 100644 --- a/hxsl/Flatten.hx +++ b/hxsl/Flatten.hx @@ -319,6 +319,9 @@ class Flatten { case TVec(size,VInt): e.t = TVec(size,VFloat); e = { e : TCall({ e : TGlobal([IVec2,IVec3,IVec4][size-2]), t : TFun([]), p : pos }, [e]), t : t, p : pos }; + case TTextureHandle: + e.t = TVec(2, VFloat); + e = floatBitsToUint(e); default: } return e; @@ -526,6 +529,7 @@ class Flatten { for( v in vl ) size += varSize(v.type, t); size; + case TTextureHandle: 2; default: throw v.toString() + " size unknown for type " + t; } diff --git a/hxsl/GlslOut.hx b/hxsl/GlslOut.hx index 96c223659..095a35c54 100644 --- a/hxsl/GlslOut.hx +++ b/hxsl/GlslOut.hx @@ -207,6 +207,8 @@ class GlslOut { throw "assert"; case TChannel(n): add("channel" + n); + case TTextureHandle: + throw "assert"; } } diff --git a/hxsl/HlslOut.hx b/hxsl/HlslOut.hx index 5cae80740..362821c02 100644 --- a/hxsl/HlslOut.hx +++ b/hxsl/HlslOut.hx @@ -100,10 +100,15 @@ class HlslOut { var decls : Array; var kind : FunctionKind; var allNames : Map; + var bindlessSamplersCount : Int; + var bindlessSamplers : Map; var samplers : Map>; var computeLayout = [1,1,1]; public var varNames : Map; + var isAssigningTexture : Bool = false; + var assignedTexture : TVar = null; + var varAccess : Map; var isVertex(get,never) : Bool; var isCompute(get,never) : Bool; @@ -191,6 +196,8 @@ class HlslOut { addArraySize(size); case TChannel(n): add("channel" + n); + case TTextureHandle: + add("uint2"); } } @@ -254,6 +261,10 @@ class HlslOut { buf = tmp; add(name); add("()"); + if ( isAssigningTexture ) { + var v = switch( last.e ) { case TVar(v): v; default: throw "assert"; }; + assignedTexture = v; + } case TIf(econd, eif, eelse): add("( "); addValue(econd, tabs); @@ -415,10 +426,22 @@ class HlslOut { decl("int3 ivec3( int v ) { return int3(v,v,v); }"); case IVec4 if( args.length == 1 && args[0].t.match(TInt | TFloat)): decl("int4 ivec4( int v ) { return int4(v,v,v,v); }"); + case ResolveSampler: + var tt = args[1].t; + var tstr = getTexType(tt); + decl('void resolveSampler( uint2 id, $tstr tex, SamplerState sampler ) { tex = ResourceDescriptorHeap[id.x]; sampler = SamplerDescriptorHeap[id.y]; }'); default: } } + function transferSampler( from : Int, to : Int ) { + var sampler = bindlessSamplers.get(from); + if ( sampler != null ) + bindlessSamplers.set(to, sampler); + else + samplers.set(to, samplers.get(from)); + } + function addExpr( e : TExpr, tabs : String ) { switch( e.e ) { case TConst(c): @@ -437,6 +460,8 @@ class HlslOut { var acc = varAccess.get(v.id); if( acc != null ) add(acc); ident(v); + if ( isAssigningTexture ) + assignedTexture = v; case TCall({ e : TGlobal(SetLayout) },_): // ignore case TCall({ e : TGlobal(g = (Texture | TextureLod)) }, args): @@ -456,6 +481,15 @@ class HlslOut { default: args[0]; } switch( expr.e ) { + case TVar(v) if (v.kind == Local ): + var sampler = bindlessSamplers.get(v.id); + if( sampler != null ) + add('__BindlessSamplers[${sampler}]'); + else { + var samplers = samplers.get(v.id); + if( samplers == null ) throw "assert"; + add('__Samplers[${samplers[offset]}]'); + } case TVar(v): var samplers = samplers.get(v.id); if( samplers == null ) throw "assert"; @@ -501,6 +535,17 @@ class HlslOut { add(", "); addValue(args[2], tabs); add("))"); + case TCall({ e : TGlobal( g = ResolveSampler) }, args = [handle, tex = { e : TVar(v)}]): + declGlobal(g, args); + add("resolveSampler"); + add("("); + addValue(handle, tabs); + add(", "); + addValue(tex, tabs); + add(", "); + add('__BindlessSamplers[$bindlessSamplersCount]'); + add(")"); + bindlessSamplers.set(v.id, bindlessSamplersCount++); case TCall(e = { e : TGlobal(g) }, args): declGlobal(g, args); switch( [g,args] ) { @@ -609,6 +654,16 @@ class HlslOut { add(","); addValue(e2, tabs); add(")"); + case [OpAssign, TSampler(_), _]: + var v = switch( e1.e ) { case TVar(v) : v; default: throw "assert"; }; + var prevAssigningTexture = isAssigningTexture; + isAssigningTexture = true; + addValue(e1, tabs); + add(" = "); + addValue(e2, tabs); + transferSampler(assignedTexture.id, v.id); + isAssigningTexture = prevAssigningTexture; + assignedTexture = null; default: addValue(e1, tabs); add(" "); @@ -629,9 +684,16 @@ class HlslOut { case TVarDecl(v, init): locals.set(v.id, v); if( init != null ) { + var prevAssigningTexture = isAssigningTexture; + isAssigningTexture = v.type.match(TSampler(_)); ident(v); add(" = "); addValue(init, tabs); + if ( isAssigningTexture ) { + transferSampler(assignedTexture.id, v.id); + assignedTexture = null; + } + isAssigningTexture = prevAssigningTexture; } else { add("/*var*/"); } @@ -1026,7 +1088,11 @@ class HlslOut { addVar(v); add(";\n"); } - add("\n"); + + if ( bindlessSamplersCount > 0 ) { + add(STATIC); + add('SamplerState __BindlessSamplers[$bindlessSamplersCount];\n'); + } for( e in exprValues ) { add(e); @@ -1036,6 +1102,7 @@ class HlslOut { public function run( s : ShaderData ) { locals = new Map(); + bindlessSamplers = new Map(); decls = []; buf = new StringBuf(); exprValues = []; diff --git a/hxsl/Linker.hx b/hxsl/Linker.hx index d210bc88b..971e19683 100644 --- a/hxsl/Linker.hx +++ b/hxsl/Linker.hx @@ -15,6 +15,12 @@ private class AllocatedVar { } } +private enum ShaderStage { + Undefined; + Vertex; + Fragment; +} + private class ShaderInfos { static var UID = 0; public var uid : Int; @@ -28,16 +34,19 @@ private class ShaderInfos { public var writeMap : Map; public var writeVars : Array; public var processed : Map; - public var vertex : Null; + public var stage : ShaderStage; public var onStack : Bool; public var hasDiscard : Bool; public var isCompute : Bool; + public var isBatchInit : Bool; public var hasSyntax : Bool; - public var marked : Null; - public function new(n, v) { + public var marked : haxe.EnumFlags; + public var added : haxe.EnumFlags; + + public function new(n, s) { this.name = n; this.uid = UID++; - this.vertex = v; + this.stage = s; processed = new Map(); usedFunctions = []; readMap = new Map(); @@ -194,9 +203,9 @@ class Linker { curShader.readVars.push(v); } // if we read a varying, force into fragment - if( curShader.vertex == null && v.v.kind == Var ) { + if( curShader.stage == Undefined && v.v.kind == Var ) { debug("Force " + curShader.name+" into fragment (use varying)"); - curShader.vertex = false; + curShader.stage = Fragment; } } return { e : TVar(v.v), t : v.v.type, p : e.p }; @@ -229,7 +238,7 @@ class Linker { } case TDiscard: if( curShader != null ) { - curShader.vertex = false; + curShader.stage = Fragment; curShader.hasDiscard = true; } case TVarDecl(v, _): @@ -255,6 +264,15 @@ class Linker { } if ( curShader != null ) curShader.hasSyntax = true; return { e : TSyntax(target, code, mappedArgs), t : e.t, p : e.p }; + case TCall({ e : TGlobal(ResolveSampler)}, [handle, { e : TVar(v)}]): + var handle = mapExprVar(handle); + var v = allocVar(v, handle.p); + if( curShader != null && !curShader.writeMap.exists(v.id) ) { + debug(curShader.name + " write " + v.path); + curShader.writeMap.set(v.id, v); + curShader.writeVars.push(v); + } + return { e : TCall({ e : TGlobal(ResolveSampler), t : TFun([]), p : e.p }, [handle, { e : TVar(v.v), t : v.v.type, p : e.p }] ), t : e.t, p : e.p }; default: } return e.map(mapExprVar); @@ -275,11 +293,12 @@ class Linker { } } - function addShader( name : String, vertex : Null, e : TExpr, p : Int ) { - var s = new ShaderInfos(name, vertex); + function addShader( name : String, stage : ShaderStage, e : TExpr, p : Int, isBatchInit : Bool ) { + var s = new ShaderInfos(name, stage); curShader = s; s.priority = p; s.body = mapExprVar(e); + s.isBatchInit = isBatchInit; shaders.push(s); curShader = null; debug("Adding shader "+name+" with priority "+p); @@ -302,11 +321,11 @@ class Linker { continue; if( !parent.writeMap.exists(v.id) ) continue; - if( s.vertex ) { - if( parent.vertex == false ) + if( s.stage == Vertex ) { + if( parent.stage == Fragment ) continue; - if( parent.vertex == null ) - parent.vertex = true; + if( parent.stage == Undefined ) + parent.stage = Vertex; } debug(s.name + " => " + parent.name + " (" + v.path + ")"); s.deps.set(parent, true); @@ -328,24 +347,41 @@ class Linker { buildDependency(s, r, s.writeMap.exists(r.id)); } - function collect( cur : ShaderInfos, out : Array, vertex : Bool ) { + function collect( cur : ShaderInfos, vout : Array, fout : Array, stage : ShaderStage ) { if( cur.onStack ) error("Loop in shader dependencies ("+cur.name+")", null); - if( cur.marked == vertex ) + if( cur.marked.has(stage) ) return; - cur.marked = vertex; + cur.marked.set(stage); cur.onStack = true; + var deps = [for( d in cur.deps.keys() ) d]; deps.sort(sortByPriorityDesc); for( d in deps ) - collect(d, out, vertex); - if( cur.vertex == null ) { - debug("MARK " + cur.name+" " + (vertex?"vertex":"fragment")); - cur.vertex = vertex; - } - if( cur.vertex == vertex ) { - debug("COLLECT " + cur.name + " " + (vertex?"vertex":"fragment")); + collect(d, vout, fout, cur.stage == Vertex ? Vertex : stage); + + inline function add(stage : ShaderStage) { + cur.added.set(stage); + var isVertex = stage == Vertex; + var out = isVertex ? vout : fout; out.push(cur); + debug("COLLECT " + cur.name + " " + (isVertex?"vertex":"fragment")); + } + + if ( cur.isBatchInit ) { + // Batch init can be added multiple times, once per stage + if ( !cur.added.has(stage) ) + add(stage); + } else if ( cur.added.toInt() == 0 ) { + add(cur.stage == Undefined ? stage : cur.stage); + } else if ( !cur.added.has(Vertex) && stage == Vertex ) { + if ( cur.stage == Fragment ) + error("Shader " + cur.name + " cannot be added to vertex stage because it is marked as fragment", null); + // Init was first encountered as fragment dependency, but is also needed in vertex + debug("REMOVE " + cur.name + " from fragment"); + cur.added.unset(Fragment); + fout.remove(cur); + add(stage); } cur.onStack = false; } @@ -376,12 +412,16 @@ class Linker { isBatchShader = mode == Batch && StringTools.startsWith(s.name,"batchShader_"); for( v in s.vars ) { var v2 = allocVar(v, null, s.name); - if( isBatchShader && v2.v.kind == Param && !StringTools.startsWith(v2.path,"Batch_") ) { - v2.v.kind = Local; - if ( v2.v.qualifiers == null ) - v2.v.qualifiers = []; - if(!v2.v.hasQualifier(Flat)){ - v2.v.qualifiers.push(Flat); + if( isBatchShader ) { + var isBatchParam = StringTools.startsWith(v2.path,"Batch_"); + if ( v2.v.kind == Param && !isBatchParam ) + v2.v.kind = Local; + if ( v.kind == Local ) { + if ( v2.v.qualifiers == null ) + v2.v.qualifiers = []; + var qualifier = isBatchParam ? Flat : NoVar; + if ( !v2.v.hasQualifier(qualifier) ) + v2.v.qualifiers.push(qualifier); } } if( v.kind == Output ) outVars.push(v); @@ -406,6 +446,7 @@ class Linker { frag : -500, } for( s in shadersData ) { + isBatchShader = mode == Batch && StringTools.startsWith(s.name,"batchShader_"); for( f in s.funs ) { var v = allocVar(f.ref, f.expr.p); if( v.kind == null ) throw "assert"; @@ -414,26 +455,27 @@ class Linker { if( mode == Compute ) throw "Unexpected "+v.kind.getName().toLowerCase()+"() function in compute shader"; var offset = v.kind == Vertex ? shaderOffset.vert : shaderOffset.frag; - addShader(s.name + "." + (v.kind == Vertex ? "vertex" : "fragment"), v.kind == Vertex, f.expr, priority + offset); + addShader(s.name + "." + (v.kind == Vertex ? "vertex" : "fragment"), v.kind == Vertex ? Vertex : Fragment, f.expr, priority + offset, false); case Main: if( mode != Compute ) throw "Unexpected main() outside compute shader"; - addShader(s.name, true, f.expr, priority).isCompute = true; + addShader(s.name, Vertex, f.expr, priority, false).isCompute = true; case Init: var prio : Array; - var status : Null = switch( f.ref.name ) { - case "__init__vertex": prio = initPrio.vert; true; - case "__init__fragment": prio = initPrio.frag; false; - case "__init__main": prio = initPrio.main; false; - default: prio = initPrio.init; null; + var isBatchInit = false; + var status : ShaderStage = switch( f.ref.name ) { + case "__init__vertex": prio = initPrio.vert; Vertex; + case "__init__fragment": prio = initPrio.frag; Fragment; + case "__init__main": prio = initPrio.main; Fragment; + default: prio = initPrio.init; isBatchInit = isBatchShader; Undefined; } switch( f.expr.e ) { case TBlock(el): var index = 0; for( e in el ) - addShader(s.name+"."+f.ref.name+(index++),status,e, prio[0]++); + addShader(s.name+"."+f.ref.name+(index++),status,e, prio[0]++, isBatchInit); default: - addShader(s.name+"."+f.ref.name,status,f.expr, prio[0]++); + addShader(s.name+"."+f.ref.name,status,f.expr, prio[0]++, isBatchInit); } case Helper: throw "Unexpected helper function in linker "+v.v.name; @@ -453,22 +495,29 @@ class Linker { #end // build dependency tree - var entry = new ShaderInfos("", false); - entry.deps = new Map(); + var ventry = new ShaderInfos("", Vertex); + ventry.deps = new Map(); + if ( outVars.length > 0 ) + buildDependency(ventry, allocVar(outVars[0],null), false); + var fentry = new ShaderInfos("", Fragment); + fentry.deps = new Map(); for( v in outVars ) - buildDependency(entry, allocVar(v,null), false); + buildDependency(fentry, allocVar(v,null), false); // force shaders containing discard to be included for( s in shaders ) if( s.hasDiscard || s.isCompute || s.hasSyntax ) { initDependencies(s); - entry.deps.set(s, true); + if ( s.stage == Vertex ) + ventry.deps.set(s, true); + else + fentry.deps.set(s, true); } // force shaders reading only params into fragment shader // (pixelColor = color with no effect in BaseMesh) for( s in shaders ) { - if( s.vertex != null ) continue; + if( s.stage != Undefined ) continue; var onlyParams = true; for( r in s.readVars ) if( r.v.kind != Param ) { @@ -477,7 +526,7 @@ class Linker { } if( onlyParams ) { debug("Force " + s.name + " into fragment since it only reads params"); - s.vertex = false; + s.stage = Fragment; } } @@ -485,36 +534,37 @@ class Linker { if ( s.deps == null) continue; // propagate fragment flag - if( s.vertex == null ) + if( s.stage == Undefined ) for( d in s.deps.keys() ) - if( d.vertex == false ) { + if( d.stage == Fragment ) { debug(s.name + " marked as fragment because of " + d.name); - s.vertex = false; + s.stage = Fragment; break; } // propagate vertex flag - if( s.vertex ) + if( s.stage == Vertex ) for( d in s.deps.keys() ) - if( d.vertex == null ) { + if( d.stage == Undefined ) { debug(d.name + " marked as vertex because of " + s.name); - d.vertex = true; + d.stage = Vertex; } } // collect needed dependencies var v = [], f = []; - collect(entry, v, true); - collect(entry, f, false); - if( f.pop() != entry ) throw "assert"; + collect(ventry, v, f, Vertex); + if( v.pop() != ventry ) throw "assert"; + collect(fentry, v, f, Fragment); + if( f.pop() != fentry ) throw "assert"; // check that all dependencies are matched for( s in shaders ) - s.marked = null; + s.marked = haxe.EnumFlags.ofInt(0); for( s in v.concat(f) ) { for( d in s.deps.keys() ) - if( d.marked == null ) + if( d.marked.toInt() == 0 ) error(d.name + " needed by " + s.name + " is unreachable", null); - s.marked = true; + s.marked.set(Vertex); } // build resulting vars diff --git a/hxsl/MacroParser.hx b/hxsl/MacroParser.hx index f10376ced..cbea9f761 100644 --- a/hxsl/MacroParser.hx +++ b/hxsl/MacroParser.hx @@ -128,6 +128,7 @@ class MacroParser { case "Channel2": return TChannel(2); case "Channel3": return TChannel(3); case "Channel4": return TChannel(4); + case "TextureHandle": return TTextureHandle; case _ if( StringTools.startsWith(name,"Sampler") ): var t = getTexDim(name.substr(7), (d,arr) -> TSampler(d,arr)); if( t != null ) return t; diff --git a/hxsl/Macros.hx b/hxsl/Macros.hx index 4e40653ba..c7db8676e 100644 --- a/hxsl/Macros.hx +++ b/hxsl/Macros.hx @@ -49,6 +49,8 @@ class Macros { throw "assert"; case TBuffer(_): macro : hxsl.Types.Buffer; + case TTextureHandle: + macro : hxsl.Types.TextureHandle; } } diff --git a/hxsl/Printer.hx b/hxsl/Printer.hx index 58f30f7b5..3c166dad4 100644 --- a/hxsl/Printer.hx +++ b/hxsl/Printer.hx @@ -66,6 +66,7 @@ class Printer { case Sampler(s): "sampler("+ s + ")"; case Final: "final"; case Flat: "flat"; + case NoVar: "noVar"; }) + " "); } if( v.kind != defKind ) diff --git a/hxsl/RuntimeShader.hx b/hxsl/RuntimeShader.hx index 04cf54424..0acc6d4ca 100644 --- a/hxsl/RuntimeShader.hx +++ b/hxsl/RuntimeShader.hx @@ -61,6 +61,9 @@ class RuntimeShaderData { public var texturesCount : Int; public var buffers : AllocParam; public var bufferCount : Int; + public var globalsHandleCount : Int; + public var paramsHandleCount : Int; + public var hasBindless : Bool; public function new() { } } @@ -99,6 +102,10 @@ class RuntimeShader { id = UID++; } + public inline function hasBindless() : Bool { + return vertex.hasBindless || (fragment != null && fragment.hasBindless); + } + public inline function hasGlobal( gid : Int ) { return globals.exists(gid); } diff --git a/hxsl/Serializer.hx b/hxsl/Serializer.hx index 5ed2450e7..9c8c98835 100644 --- a/hxsl/Serializer.hx +++ b/hxsl/Serializer.hx @@ -106,7 +106,7 @@ class Serializer { out.addByte((dim.getIndex() << 1) | (arr ? 1 : 0)); case TRWTexture(dim, arr, chans): out.addByte((dim.getIndex() << 3) | (arr ? 1 : 0) | ((chans - 1) << 1)); - case TVoid, TInt, TBool, TFloat, TString, TMat2, TMat3, TMat4, TMat3x4: + case TVoid, TInt, TBool, TFloat, TString, TMat2, TMat3, TMat4, TMat3x4, TTextureHandle: case __TUnused: throw "assert"; } @@ -173,6 +173,8 @@ class Serializer { TBuffer(t, v == null ? SConst(readVarInt()) : SVar(v), kind); case 17: TChannel(input.readByte()); + case 18: + TTextureHandle; default: throw "assert"; } @@ -209,7 +211,7 @@ class Serializer { for( q in v.qualifiers ) { out.addByte(q.getIndex()); switch (q) { - case Private, Nullable, PerObject, Shared, Ignore, Final, Flat: + case Private, Nullable, PerObject, Shared, Ignore, Final, Flat, NoVar: case Const(max): out.addInt32(max == null ? 0 : max); case Name(n): writeString(n); case Precision(p): out.addByte(p.getIndex()); diff --git a/hxsl/Splitter.hx b/hxsl/Splitter.hx index c7b9abef7..de1234ac6 100644 --- a/hxsl/Splitter.hx +++ b/hxsl/Splitter.hx @@ -58,7 +58,7 @@ class Splitter { var v = inf.v; if( inf.local ) continue; switch( v.kind ) { - case Var, Local: + case Var, Local if ( !v.hasQualifier(NoVar) ): var fv = fvars.get(inf.origin.id); v.kind = fv != null && fv.read > 0 ? Var : Local; default: @@ -264,7 +264,7 @@ class Splitter { if( v.qualifiers != null ) { for ( q in v.qualifiers ) { switch (q) { - case Final, Flat: + case Final, Flat, NoVar: if ( nv.qualifiers == null ) nv.qualifiers = []; nv.qualifiers.push(q); @@ -353,6 +353,10 @@ class Splitter { checkSyntaxExpr(arg.e); checkExpr(arg.e); } + case TCall({ e : TGlobal(ResolveSampler)}, [handle, { e : TVar(v)}]): + var inf = get(v); + inf.write++; + checkExpr(handle); default: e.iter(checkExpr); } diff --git a/hxsl/Types.hx b/hxsl/Types.hx index f132f4081..1f5630a2c 100644 --- a/hxsl/Types.hx +++ b/hxsl/Types.hx @@ -8,6 +8,7 @@ typedef Matrix = h3d.Matrix; typedef Texture = h3d.mat.Texture; typedef TextureArray = h3d.mat.TextureArray; typedef TextureChannel = h3d.mat.Texture; +typedef TextureHandle = h3d.mat.TextureHandle; typedef Buffer = h3d.Buffer; class ChannelTools { diff --git a/samples/Bindless.hx b/samples/Bindless.hx new file mode 100644 index 000000000..03d86d38f --- /dev/null +++ b/samples/Bindless.hx @@ -0,0 +1,57 @@ +class Bindless extends SampleApp { + + var cache : h3d.prim.ModelCache; + + var rockBatch : h3d.scene.MeshBatch; + var treeBatch : h3d.scene.MeshBatch; + + override function init() { + cache = new h3d.prim.ModelCache(); + + var sun = new h3d.scene.pbr.DirLight(new h3d.Vector(0.3, -0.4, -0.9), s3d); + sun.shadows.mode = Dynamic; + + var rockTex1 = hxd.Res.rockTexture.toTexture(); + var treeTex1 = hxd.Res.treeTexture.toTexture(); + + var rock : h3d.scene.Mesh = cast cache.loadModel(hxd.Res.rock); + var tree : h3d.scene.Mesh = cast cache.loadModel(hxd.Res.tree); + + var rockTex2 = hxd.Res.rockTexture2.toTexture(); + var treeTex2 = hxd.Res.treeTexture2.toTexture(); + + rockBatch = new h3d.scene.MeshBatch(cast rock.primitive,s3d); + rockBatch.material.texture = rockTex1; + rockBatch.enableStorageBuffer(); + rockBatch.enablePerInstanceTexture(); + rockBatch.begin(40); + treeBatch = new h3d.scene.MeshBatch(cast tree.primitive,s3d); + treeBatch.material.texture = treeTex1; + treeBatch.enableStorageBuffer(); + treeBatch.enablePerInstanceTexture(); + treeBatch.begin(40); + + var rand = new hxd.Rand(2); + for ( i in 0...40 ) { + rockBatch.x = rand.rand() * 40 - 20; + rockBatch.y = rand.rand() * 40 - 20; + rockBatch.setRotation(0, 0, rand.rand() * Math.PI * 2); + treeBatch.x = rand.rand() * 40 - 20; + treeBatch.y = rand.rand() * 40 - 20; + treeBatch.setRotation(0, 0, rand.rand() * Math.PI * 2); + rockBatch.material.texture = (rand.rand() < 0.5) ? rockTex1 : rockTex2; + treeBatch.material.texture = (rand.rand() < 0.5) ? treeTex1 : treeTex2; + rockBatch.emitInstance(); + treeBatch.emitInstance(); + } + + new h3d.scene.CameraController(s3d).loadFromCamera(); + } + + static function main() { + h3d.mat.PbrMaterialSetup.set(); + hxd.Res.initEmbed(); + new Bindless(); + } + +} diff --git a/samples/bindless_res/rock.hmd b/samples/bindless_res/rock.hmd new file mode 100644 index 000000000..46e50eeed Binary files /dev/null and b/samples/bindless_res/rock.hmd differ diff --git a/samples/bindless_res/rockTexture.jpg b/samples/bindless_res/rockTexture.jpg new file mode 100644 index 000000000..c6d1fc011 Binary files /dev/null and b/samples/bindless_res/rockTexture.jpg differ diff --git a/samples/bindless_res/rockTexture2.jpg b/samples/bindless_res/rockTexture2.jpg new file mode 100644 index 000000000..24d5db9c7 Binary files /dev/null and b/samples/bindless_res/rockTexture2.jpg differ diff --git a/samples/bindless_res/tree.hmd b/samples/bindless_res/tree.hmd new file mode 100644 index 000000000..70e25a79b Binary files /dev/null and b/samples/bindless_res/tree.hmd differ diff --git a/samples/bindless_res/treeTexture.png b/samples/bindless_res/treeTexture.png new file mode 100644 index 000000000..2d689899a Binary files /dev/null and b/samples/bindless_res/treeTexture.png differ diff --git a/samples/bindless_res/treeTexture2.png b/samples/bindless_res/treeTexture2.png new file mode 100644 index 000000000..dec034242 Binary files /dev/null and b/samples/bindless_res/treeTexture2.png differ