GPUDevice: Use separate buffer for push constants

This commit is contained in:
Stenzek
2025-10-14 19:40:23 +10:00
parent 6848f07c57
commit 916b23f85c
13 changed files with 176 additions and 117 deletions

View File

@@ -1981,7 +1981,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
if (IsVulkan())
ss << ", set = 0, binding = 0";
else if (IsMetal())
ss << ", set = 0, binding = 1";
ss << ", set = 1, binding = 0";
else if (m_use_glsl_binding_layout)
ss << ", binding = 0";

View File

@@ -5,4 +5,4 @@
#include "common/types.h"
inline constexpr u32 SHADER_CACHE_VERSION = 33;
inline constexpr u32 SHADER_CACHE_VERSION = 34;

View File

@@ -545,8 +545,21 @@ bool D3D11Device::CreateBuffers(Error* error)
return false;
}
const CD3D11_BUFFER_DESC pc_desc(PUSH_CONSTANT_BUFFER_SIZE, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC,
D3D11_CPU_ACCESS_WRITE);
if (const HRESULT hr = m_device->CreateBuffer(&pc_desc, nullptr, m_push_constant_buffer.GetAddressOf()); FAILED(hr))
{
Error::SetHResult(error, "Failed to create push constant buffer: ", hr);
return false;
}
// Index buffer never changes :)
m_context->IASetIndexBuffer(m_index_buffer.GetD3DBuffer(), DXGI_FORMAT_R16_UINT, 0);
m_context->VSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
m_context->PSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
if (m_features.compute_shaders)
m_context->CSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
return true;
}
@@ -919,15 +932,19 @@ void D3D11Device::UnmapIndexBuffer(u32 used_index_count)
void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
{
const u32 req_align =
m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD;
const u32 req_size = Common::AlignUpPow2(data_size, req_align);
const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size);
std::memcpy(res.pointer, data, data_size);
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;
DebugAssert(data_size <= PUSH_CONSTANT_BUFFER_SIZE);
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
D3D11_MAPPED_SUBRESOURCE mapped;
if (const HRESULT hr = m_context->Map(m_push_constant_buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
FAILED(hr))
{
ERROR_LOG("Failed to map push constant buffer: {:08X}", static_cast<unsigned>(hr));
return;
}
std::memcpy(mapped.pData, data, data_size);
m_context->Unmap(m_push_constant_buffer.Get(), 0);
s_stats.buffer_streamed += data_size;
}
void* D3D11Device::MapUniformBuffer(u32 size)

View File

@@ -144,6 +144,7 @@ private:
static constexpr u32 MIN_UNIFORM_BUFFER_SIZE = 16;
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT_DISCARD = 16;
static constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128;
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
void SetFeatures(CreateFlags create_flags);
@@ -183,6 +184,7 @@ private:
D3D11StreamBuffer m_vertex_buffer;
D3D11StreamBuffer m_index_buffer;
D3D11StreamBuffer m_uniform_buffer;
ComPtr<ID3D11Buffer> m_push_constant_buffer;
D3D11Pipeline* m_current_pipeline = nullptr;
std::array<D3D11Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};

View File

@@ -1682,7 +1682,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
@@ -1698,7 +1698,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
@@ -1734,7 +1734,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
D3D12_SHADER_VISIBILITY_PIXEL);
}
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");

View File

@@ -1704,14 +1704,16 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
}
// Need to know if there's UBOs for mapping.
const spvc_reflected_resource *ubos, *textures;
size_t ubos_count, textures_count, images_count;
const spvc_reflected_resource *ubos, *push_constants, *textures, *images;
size_t ubos_count, push_constants_count, textures_count, images_count;
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
&ubos_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(
resources, SPVC_RESOURCE_TYPE_PUSH_CONSTANT, &push_constants, &push_constants_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
&textures, &textures_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
&textures, &images_count)) != SPVC_SUCCESS)
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, &images,
&images_count)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
return {};
@@ -1765,7 +1767,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for UBO failed: {}",
static_cast<int>(sres));
return {};
}
}
if (push_constants_count > 0)
{
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
.desc_set = SPVC_HLSL_PUSH_CONSTANT_DESC_SET,
.binding = SPVC_HLSL_PUSH_CONSTANT_BINDING,
.cbv = {.register_space = 0, .register_binding = 1},
.uav = {},
.srv = {},
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for push constant failed: {}",
static_cast<int>(sres));
return {};
}
}
@@ -1783,7 +1803,8 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
.sampler = {.register_space = 0, .register_binding = i}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for texture failed: {}",
static_cast<int>(sres));
return {};
}
}
@@ -1802,7 +1823,8 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for image failed: {}",
static_cast<int>(sres));
return {};
}
}
@@ -1875,63 +1897,52 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
.msl_buffer = 0,
.msl_texture = 0,
.msl_sampler = 0};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
static_cast<int>(sres));
return {};
}
const auto add_msl_resource_binding = [&scompiler, &execmodel, &error](unsigned desc_set, unsigned binding,
unsigned msl_buffer, unsigned msl_texture,
unsigned msl_sampler) {
const spvc_msl_resource_binding rb = {.stage = execmodel,
.desc_set = desc_set,
.binding = binding,
.msl_buffer = msl_buffer,
.msl_texture = msl_texture,
.msl_sampler = msl_sampler};
const spvc_result sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb);
if (sres != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return false;
}
return true;
};
// push constant
if (!add_msl_resource_binding(SPVC_MSL_PUSH_CONSTANT_DESC_SET, SPVC_MSL_PUSH_CONSTANT_BINDING, 2, 0, 0))
return false;
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
const spvc_msl_resource_binding rb = {.stage = execmodel,
.desc_set = TEXTURE_DESCRIPTOR_SET,
.binding = i,
.msl_buffer = i,
.msl_texture = i,
.msl_sampler = i};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
// Add +1 for the buffer binding since we use this for texture buffers.
if (!add_msl_resource_binding(TEXTURE_DESCRIPTOR_SET, i, i + 1, i, i))
return false;
}
}
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
{
const spvc_msl_resource_binding rb = {
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
return {};
}
if (!add_msl_resource_binding(2, 0, 0, MAX_TEXTURE_SAMPLERS, 0))
return false;
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
const spvc_msl_resource_binding rb = {
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
if (!add_msl_resource_binding(2, i, i, i, i))
return false;
}
}
}

View File

@@ -82,8 +82,8 @@ class MetalPipeline final : public GPUPipeline
public:
~MetalPipeline() override;
ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); }
ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); }
ALWAYS_INLINE bool IsRenderPipeline() const { return !IsComputePipeline(); }
ALWAYS_INLINE bool IsComputePipeline() const { return GPUDevice::IsComputeLayout(m_layout); }
ALWAYS_INLINE id<MTLRenderPipelineState> GetRenderPipelineState() const
{
return (id<MTLRenderPipelineState>)m_pipeline;
@@ -93,20 +93,23 @@ public:
return (id<MTLComputePipelineState>)m_pipeline;
}
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
ALWAYS_INLINE Layout GetLayout() const { return m_layout; }
ALWAYS_INLINE MTLCullMode GetCullMode() const { return static_cast<MTLCullMode>(m_cull_mode); }
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return static_cast<MTLPrimitiveType>(m_primitive); }
#ifdef ENABLE_GPU_OBJECT_NAMES
void SetDebugName(std::string_view name) override;
#endif
private:
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive);
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, Layout layout, MTLCullMode cull_mode,
MTLPrimitiveType primitive);
id m_pipeline;
id<MTLDepthStencilState> m_depth;
MTLCullMode m_cull_mode;
MTLPrimitiveType m_primitive;
Layout m_layout;
u8 m_cull_mode;
u8 m_primitive;
};
class MetalTexture final : public GPUTexture
@@ -350,6 +353,12 @@ private:
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 64 * 1024 * 1024; // TODO reduce after separate allocations
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
static constexpr u32 VERTEX_BINDING_UBO = 0;
static constexpr u32 VERTEX_BINDING_VBO = 1;
static constexpr u32 VERTEX_BINDING_PUSH_CONSTANTS = 2;
static constexpr u32 FRAGMENT_BINDING_UBO = 0;
static constexpr u32 FRAGMENT_BINDING_SSBO = 1;
static constexpr u32 FRAGMENT_BINDING_PUSH_CONSTANTS = 2;
using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
@@ -393,7 +402,7 @@ private:
void PreDrawCheck();
void SetInitialEncoderState();
void PushUniformBuffer(const void* data, u32 data_size);
void PushRenderUniformBuffer(const void* data, u32 data_size);
void SubmitDrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type);
void SetViewportInRenderEncoder();
void SetScissorInRenderEncoder();

View File

@@ -667,9 +667,10 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
return CreateShaderFromMSL(stage, source, entry_point, error);
}
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, Layout layout, MTLCullMode cull_mode,
MTLPrimitiveType primitive)
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
: m_pipeline(pipeline), m_depth(depth), m_layout(layout), m_cull_mode(static_cast<u8>(cull_mode)),
m_primitive(static_cast<u8>(primitive))
{
}
@@ -868,7 +869,7 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
return {};
}
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, cull_mode, primitive));
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, config.layout, cull_mode, primitive));
}
}
@@ -891,7 +892,8 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Comp
return {};
}
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint));
return std::unique_ptr<GPUPipeline>(
new MetalPipeline(pipeline, nil, config.layout, MTLCullModeNone, MTLPrimitiveTypePoint));
}
}
@@ -1593,11 +1595,13 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
[m_render_encoder setCullMode:MTLCullModeNone];
if (depth != m_current_depth_state)
[m_render_encoder setDepthStencilState:depth];
[m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:0];
[m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:VERTEX_BINDING_UBO];
[m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:0 vertexCount:3];
s_stats.num_draws++;
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer()
offset:m_current_uniform_buffer_position
atIndex:VERTEX_BINDING_UBO];
if (m_current_pipeline)
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
if (m_current_cull_mode != MTLCullModeNone)
@@ -1826,14 +1830,6 @@ void MetalDevice::UnmapIndexBuffer(u32 used_index_count)
m_index_buffer.CommitMemory(size);
}
void MetalDevice::PushUniformBuffer(const void* data, u32 data_size)
{
s_stats.buffer_streamed += data_size;
void* map = MapUniformBuffer(data_size);
std::memcpy(map, data, data_size);
UnmapUniformBuffer(data_size);
}
void* MetalDevice::MapUniformBuffer(u32 size)
{
const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT);
@@ -1854,8 +1850,8 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
m_uniform_buffer.CommitMemory(size);
if (InRenderPass())
{
[m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:0];
[m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:0];
[m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:VERTEX_BINDING_UBO];
[m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:FRAGMENT_BINDING_UBO];
}
}
@@ -1983,7 +1979,7 @@ void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
m_current_ssbo = B;
if (InRenderPass())
[m_render_encoder setFragmentBuffer:B offset:0 atIndex:1];
[m_render_encoder setFragmentBuffer:B offset:0 atIndex:FRAGMENT_BINDING_SSBO];
}
void MetalDevice::UnbindTexture(MetalTexture* tex)
@@ -2030,7 +2026,7 @@ void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf)
m_current_ssbo = nil;
if (InRenderPass())
[m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1];
[m_render_encoder setFragmentBuffer:nil offset:0 atIndex:FRAGMENT_BINDING_SSBO];
}
void MetalDevice::SetViewport(const GSVector4i rc)
@@ -2241,9 +2237,15 @@ void MetalDevice::SetInitialEncoderState()
// Set initial state.
// TODO: avoid uniform set here? it's probably going to get changed...
// Might be better off just deferring all the init until the first draw...
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
[m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
[m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1];
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer()
offset:m_current_uniform_buffer_position
atIndex:VERTEX_BINDING_UBO];
[m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:VERTEX_BINDING_VBO];
[m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer()
offset:m_current_uniform_buffer_position
atIndex:FRAGMENT_BINDING_UBO];
if (m_current_ssbo)
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:FRAGMENT_BINDING_SSBO];
[m_render_encoder setCullMode:m_current_cull_mode];
if (m_current_depth_state != nil)
[m_render_encoder setDepthStencilState:m_current_depth_state];
@@ -2251,8 +2253,6 @@ void MetalDevice::SetInitialEncoderState()
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
if (m_current_ssbo)
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))
{
@@ -2291,6 +2291,16 @@ void MetalDevice::PreDrawCheck()
}
}
void MetalDevice::PushRenderUniformBuffer(const void* data, u32 data_size)
{
DebugAssert(InRenderPass() && m_current_pipeline);
s_stats.buffer_streamed += data_size;
// Maybe we'd be better off with another buffer...
[m_render_encoder setVertexBytes:data length:data_size atIndex:VERTEX_BINDING_PUSH_CONSTANTS];
[m_render_encoder setFragmentBytes:data length:data_size atIndex:FRAGMENT_BINDING_PUSH_CONSTANTS];
}
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
{
PreDrawCheck();
@@ -2302,7 +2312,7 @@ void MetalDevice::DrawWithPushConstants(u32 vertex_count, u32 base_vertex, const
u32 push_constants_size)
{
PreDrawCheck();
PushUniformBuffer(push_constants, push_constants_size);
PushRenderUniformBuffer(push_constants, push_constants_size);
s_stats.num_draws++;
[m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count];
}
@@ -2329,7 +2339,7 @@ void MetalDevice::DrawIndexedWithPushConstants(u32 index_count, u32 base_index,
{
PreDrawCheck();
PushUniformBuffer(push_constants, push_constants_size);
PushRenderUniformBuffer(push_constants, push_constants_size);
s_stats.num_draws++;
@@ -2357,7 +2367,7 @@ void MetalDevice::DrawIndexedWithBarrierWithPushConstants(u32 index_count, u32 b
{
PreDrawCheck();
PushUniformBuffer(push_constants, push_constants_size);
PushRenderUniformBuffer(push_constants, push_constants_size);
SubmitDrawIndexedWithBarrier(index_count, base_index, base_vertex, type);
}
@@ -2479,7 +2489,7 @@ void MetalDevice::DispatchWithPushConstants(u32 threads_x, u32 threads_y, u32 th
}
DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline());
PushUniformBuffer(push_constants, push_constants_size);
[m_compute_encoder setBytes:push_constants length:push_constants_size atIndex:2];
// TODO: We could remap to the optimal group size..
[m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z)

View File

@@ -707,7 +707,8 @@ bool OpenGLDevice::CreateBuffers()
{
if (!(m_vertex_buffer = OpenGLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE)) ||
!(m_index_buffer = OpenGLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE)) ||
!(m_uniform_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE))) [[unlikely]]
!(m_uniform_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE)) ||
!(m_push_constant_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, PUSH_CONSTANT_BUFFER_SIZE))) [[unlikely]]
{
ERROR_LOG("Failed to create one or more device buffers.");
return false;
@@ -716,6 +717,7 @@ bool OpenGLDevice::CreateBuffers()
GL_OBJECT_NAME(m_vertex_buffer, "Device Vertex Buffer");
GL_OBJECT_NAME(m_index_buffer, "Device Index Buffer");
GL_OBJECT_NAME(m_uniform_buffer, "Device Uniform Buffer");
GL_OBJECT_NAME(m_push_constant_buffer, "Device Push Constant Buffer");
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
m_uniform_buffer_alignment = std::max<GLuint>(m_uniform_buffer_alignment, 16);
@@ -756,6 +758,7 @@ void OpenGLDevice::DestroyBuffers()
if (m_read_fbo != 0)
glDeleteFramebuffers(1, &m_read_fbo);
m_texture_stream_buffer.reset();
m_push_constant_buffer.reset();
m_uniform_buffer.reset();
m_index_buffer.reset();
m_vertex_buffer.reset();
@@ -1109,11 +1112,11 @@ void OpenGLDevice::UnmapIndexBuffer(u32 used_index_count)
void OpenGLDevice::PushUniformBuffer(const void* data, u32 data_size)
{
const auto res = m_uniform_buffer->Map(m_uniform_buffer_alignment, data_size);
const auto res = m_push_constant_buffer->Map(m_uniform_buffer_alignment, data_size);
std::memcpy(res.pointer, data, data_size);
m_uniform_buffer->Unmap(data_size);
m_push_constant_buffer->Unmap(data_size);
s_stats.buffer_streamed += data_size;
glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_buffer->GetGLBufferId(), res.buffer_offset, data_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_push_constant_buffer->GetGLBufferId(), res.buffer_offset, data_size);
}
void* OpenGLDevice::MapUniformBuffer(u32 size)

View File

@@ -165,6 +165,7 @@ private:
static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
bool CheckFeatures(CreateFlags create_flags);
@@ -201,6 +202,7 @@ private:
std::unique_ptr<OpenGLStreamBuffer> m_vertex_buffer;
std::unique_ptr<OpenGLStreamBuffer> m_index_buffer;
std::unique_ptr<OpenGLStreamBuffer> m_uniform_buffer;
std::unique_ptr<OpenGLStreamBuffer> m_push_constant_buffer;
std::unique_ptr<OpenGLStreamBuffer> m_texture_stream_buffer;
// TODO: pass in file instead of blob for pipeline cache

View File

@@ -472,9 +472,12 @@ void OpenGLDevice::PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig,
{
if (!ShaderGen::UseGLSLBindingLayout())
{
GLint location = glGetUniformBlockIndex(program_id, "UBOBlock");
if (location >= 0)
glUniformBlockBinding(program_id, location, 0);
const GLint ubo_location = glGetUniformBlockIndex(program_id, "UBOBlock");
if (ubo_location >= 0)
glUniformBlockBinding(program_id, ubo_location, 0);
const GLint push_constant_location = glGetUniformBlockIndex(program_id, "PushConstants");
if (push_constant_location >= 0)
glUniformBlockBinding(program_id, push_constant_location, 1);
glUseProgram(program_id);
@@ -482,9 +485,9 @@ void OpenGLDevice::PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig,
const u32 num_textures = std::max<u32>(GetActiveTexturesForLayout(plconfig.layout), 1);
for (u32 i = 0; i < num_textures; i++)
{
location = glGetUniformLocation(program_id, TinyString::from_format("samp{}", i));
if (location >= 0)
glUniform1i(location, i);
const GLint samp_location = glGetUniformLocation(program_id, TinyString::from_format("samp{}", i));
if (samp_location >= 0)
glUniform1i(samp_location, i);
}
glUseProgram(m_last_program);

View File

@@ -379,40 +379,42 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
m_has_uniform_buffer = false;
}
void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan) const
void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant) const
{
const u32 binding = push_constant ? 1 : 0;
const char* const name = push_constant ? "PushConstants" : "UBOBlock";
if (m_shader_language == GPUShaderLanguage::GLSLVK)
{
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
if (push_constant && (m_render_api == RenderAPI::Vulkan || m_render_api == RenderAPI::Metal))
{
ss << "layout(push_constant, row_major) uniform PushConstants\n";
ss << "layout(push_constant, row_major) uniform " << name << "\n";
}
else
{
ss << "layout(std140, row_major, set = 0, binding = 0) uniform UBOBlock\n";
ss << "layout(std140, row_major, set = 0, binding = " << binding << ") uniform " << name << "\n";
m_has_uniform_buffer = true;
}
}
else if (m_glsl)
{
if (m_use_glsl_binding_layout)
ss << "layout(std140, row_major, binding = 0) uniform UBOBlock\n";
ss << "layout(std140, row_major, binding = " << binding << ") uniform " << name << "\n";
else
ss << "layout(std140, row_major) uniform UBOBlock\n";
ss << "layout(std140, row_major) uniform " << name << "\n";
m_has_uniform_buffer = true;
}
else
{
ss << "cbuffer UBOBlock : register(b0)\n";
ss << "cbuffer " << name << " : register(b" << binding << ")\n";
m_has_uniform_buffer = true;
}
}
void ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
bool push_constant_on_vulkan) const
bool push_constant) const
{
WriteUniformBufferDeclaration(ss, push_constant_on_vulkan);
WriteUniformBufferDeclaration(ss, push_constant);
ss << "{\n";
for (const char* member : members)

View File

@@ -45,9 +45,9 @@ public:
void DefineMacro(std::stringstream& ss, const char* name, s32 value) const;
void WriteHeader(std::stringstream& ss, bool enable_rov = false, bool enable_framebuffer_fetch = false,
bool enable_dual_source_blend = false) const;
void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan) const;
void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant) const;
void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
bool push_constant_on_vulkan) const;
bool push_constant) const;
void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false,
bool is_int = false, bool is_unsigned = false) const;
void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned) const;