mirror of
https://github.com/stenzek/duckstation.git
synced 2026-02-13 18:04:32 +00:00
GPUDevice: Use separate buffer for push constants
This commit is contained in:
@@ -1981,7 +1981,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_buffer, b
|
||||
if (IsVulkan())
|
||||
ss << ", set = 0, binding = 0";
|
||||
else if (IsMetal())
|
||||
ss << ", set = 0, binding = 1";
|
||||
ss << ", set = 1, binding = 0";
|
||||
else if (m_use_glsl_binding_layout)
|
||||
ss << ", binding = 0";
|
||||
|
||||
|
||||
@@ -5,4 +5,4 @@
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
inline constexpr u32 SHADER_CACHE_VERSION = 33;
|
||||
inline constexpr u32 SHADER_CACHE_VERSION = 34;
|
||||
|
||||
@@ -545,8 +545,21 @@ bool D3D11Device::CreateBuffers(Error* error)
|
||||
return false;
|
||||
}
|
||||
|
||||
const CD3D11_BUFFER_DESC pc_desc(PUSH_CONSTANT_BUFFER_SIZE, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC,
|
||||
D3D11_CPU_ACCESS_WRITE);
|
||||
if (const HRESULT hr = m_device->CreateBuffer(&pc_desc, nullptr, m_push_constant_buffer.GetAddressOf()); FAILED(hr))
|
||||
{
|
||||
Error::SetHResult(error, "Failed to create push constant buffer: ", hr);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Index buffer never changes :)
|
||||
m_context->IASetIndexBuffer(m_index_buffer.GetD3DBuffer(), DXGI_FORMAT_R16_UINT, 0);
|
||||
m_context->VSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
|
||||
m_context->PSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
|
||||
if (m_features.compute_shaders)
|
||||
m_context->CSSetConstantBuffers(1, 1, m_push_constant_buffer.GetAddressOf());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -919,15 +932,19 @@ void D3D11Device::UnmapIndexBuffer(u32 used_index_count)
|
||||
|
||||
void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||
{
|
||||
const u32 req_align =
|
||||
m_uniform_buffer.IsUsingMapNoOverwrite() ? UNIFORM_BUFFER_ALIGNMENT : UNIFORM_BUFFER_ALIGNMENT_DISCARD;
|
||||
const u32 req_size = Common::AlignUpPow2(data_size, req_align);
|
||||
const auto res = m_uniform_buffer.Map(m_context.Get(), req_align, req_size);
|
||||
std::memcpy(res.pointer, data, data_size);
|
||||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
DebugAssert(data_size <= PUSH_CONSTANT_BUFFER_SIZE);
|
||||
|
||||
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
if (const HRESULT hr = m_context->Map(m_push_constant_buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
|
||||
FAILED(hr))
|
||||
{
|
||||
ERROR_LOG("Failed to map push constant buffer: {:08X}", static_cast<unsigned>(hr));
|
||||
return;
|
||||
}
|
||||
|
||||
std::memcpy(mapped.pData, data, data_size);
|
||||
m_context->Unmap(m_push_constant_buffer.Get(), 0);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
}
|
||||
|
||||
void* D3D11Device::MapUniformBuffer(u32 size)
|
||||
|
||||
@@ -144,6 +144,7 @@ private:
|
||||
static constexpr u32 MIN_UNIFORM_BUFFER_SIZE = 16;
|
||||
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
|
||||
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT_DISCARD = 16;
|
||||
static constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128;
|
||||
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
|
||||
|
||||
void SetFeatures(CreateFlags create_flags);
|
||||
@@ -183,6 +184,7 @@ private:
|
||||
D3D11StreamBuffer m_vertex_buffer;
|
||||
D3D11StreamBuffer m_index_buffer;
|
||||
D3D11StreamBuffer m_uniform_buffer;
|
||||
ComPtr<ID3D11Buffer> m_push_constant_buffer;
|
||||
|
||||
D3D11Pipeline* m_current_pipeline = nullptr;
|
||||
std::array<D3D11Texture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
|
||||
|
||||
@@ -1682,7 +1682,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
|
||||
D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
}
|
||||
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
if (!(rs = rsb.Create(error, true)))
|
||||
return false;
|
||||
D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
|
||||
@@ -1698,7 +1698,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
|
||||
D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
}
|
||||
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
if (!(rs = rsb.Create(error, true)))
|
||||
return false;
|
||||
D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
|
||||
@@ -1734,7 +1734,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
|
||||
D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
}
|
||||
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.Add32BitConstants(1, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
if (!(rs = rsb.Create(error, true)))
|
||||
return false;
|
||||
D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");
|
||||
|
||||
@@ -1704,14 +1704,16 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
}
|
||||
|
||||
// Need to know if there's UBOs for mapping.
|
||||
const spvc_reflected_resource *ubos, *textures;
|
||||
size_t ubos_count, textures_count, images_count;
|
||||
const spvc_reflected_resource *ubos, *push_constants, *textures, *images;
|
||||
size_t ubos_count, push_constants_count, textures_count, images_count;
|
||||
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
|
||||
&ubos_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(
|
||||
resources, SPVC_RESOURCE_TYPE_PUSH_CONSTANT, &push_constants, &push_constants_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
|
||||
&textures, &textures_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
|
||||
&textures, &images_count)) != SPVC_SUCCESS)
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, &images,
|
||||
&images_count)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
@@ -1765,7 +1767,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
.sampler = {}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for UBO failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
if (push_constants_count > 0)
|
||||
{
|
||||
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = SPVC_HLSL_PUSH_CONSTANT_DESC_SET,
|
||||
.binding = SPVC_HLSL_PUSH_CONSTANT_BINDING,
|
||||
.cbv = {.register_space = 0, .register_binding = 1},
|
||||
.uav = {},
|
||||
.srv = {},
|
||||
.sampler = {}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for push constant failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@@ -1783,7 +1803,8 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
.sampler = {.register_space = 0, .register_binding = i}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for texture failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@@ -1802,7 +1823,8 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
.sampler = {}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() for image failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@@ -1875,63 +1897,52 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
return {};
|
||||
}
|
||||
|
||||
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
|
||||
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
|
||||
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
|
||||
.msl_buffer = 0,
|
||||
.msl_texture = 0,
|
||||
.msl_sampler = 0};
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
const auto add_msl_resource_binding = [&scompiler, &execmodel, &error](unsigned desc_set, unsigned binding,
|
||||
unsigned msl_buffer, unsigned msl_texture,
|
||||
unsigned msl_sampler) {
|
||||
const spvc_msl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = desc_set,
|
||||
.binding = binding,
|
||||
.msl_buffer = msl_buffer,
|
||||
.msl_texture = msl_texture,
|
||||
.msl_sampler = msl_sampler};
|
||||
|
||||
const spvc_result sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb);
|
||||
if (sres != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// push constant
|
||||
if (!add_msl_resource_binding(SPVC_MSL_PUSH_CONSTANT_DESC_SET, SPVC_MSL_PUSH_CONSTANT_BINDING, 2, 0, 0))
|
||||
return false;
|
||||
|
||||
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = TEXTURE_DESCRIPTOR_SET,
|
||||
.binding = i,
|
||||
.msl_buffer = i,
|
||||
.msl_texture = i,
|
||||
.msl_sampler = i};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
// Add +1 for the buffer binding since we use this for texture buffers.
|
||||
if (!add_msl_resource_binding(TEXTURE_DESCRIPTOR_SET, i, i + 1, i, i))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
if (!add_msl_resource_binding(2, 0, 0, MAX_TEXTURE_SAMPLERS, 0))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
if (!add_msl_resource_binding(2, i, i, i, i))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,8 +82,8 @@ class MetalPipeline final : public GPUPipeline
|
||||
public:
|
||||
~MetalPipeline() override;
|
||||
|
||||
ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); }
|
||||
ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); }
|
||||
ALWAYS_INLINE bool IsRenderPipeline() const { return !IsComputePipeline(); }
|
||||
ALWAYS_INLINE bool IsComputePipeline() const { return GPUDevice::IsComputeLayout(m_layout); }
|
||||
ALWAYS_INLINE id<MTLRenderPipelineState> GetRenderPipelineState() const
|
||||
{
|
||||
return (id<MTLRenderPipelineState>)m_pipeline;
|
||||
@@ -93,20 +93,23 @@ public:
|
||||
return (id<MTLComputePipelineState>)m_pipeline;
|
||||
}
|
||||
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
|
||||
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
|
||||
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
|
||||
ALWAYS_INLINE Layout GetLayout() const { return m_layout; }
|
||||
ALWAYS_INLINE MTLCullMode GetCullMode() const { return static_cast<MTLCullMode>(m_cull_mode); }
|
||||
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return static_cast<MTLPrimitiveType>(m_primitive); }
|
||||
|
||||
#ifdef ENABLE_GPU_OBJECT_NAMES
|
||||
void SetDebugName(std::string_view name) override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive);
|
||||
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, Layout layout, MTLCullMode cull_mode,
|
||||
MTLPrimitiveType primitive);
|
||||
|
||||
id m_pipeline;
|
||||
id<MTLDepthStencilState> m_depth;
|
||||
MTLCullMode m_cull_mode;
|
||||
MTLPrimitiveType m_primitive;
|
||||
Layout m_layout;
|
||||
u8 m_cull_mode;
|
||||
u8 m_primitive;
|
||||
};
|
||||
|
||||
class MetalTexture final : public GPUTexture
|
||||
@@ -350,6 +353,12 @@ private:
|
||||
static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
|
||||
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 64 * 1024 * 1024; // TODO reduce after separate allocations
|
||||
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
|
||||
static constexpr u32 VERTEX_BINDING_UBO = 0;
|
||||
static constexpr u32 VERTEX_BINDING_VBO = 1;
|
||||
static constexpr u32 VERTEX_BINDING_PUSH_CONSTANTS = 2;
|
||||
static constexpr u32 FRAGMENT_BINDING_UBO = 0;
|
||||
static constexpr u32 FRAGMENT_BINDING_SSBO = 1;
|
||||
static constexpr u32 FRAGMENT_BINDING_PUSH_CONSTANTS = 2;
|
||||
|
||||
using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
|
||||
|
||||
@@ -393,7 +402,7 @@ private:
|
||||
|
||||
void PreDrawCheck();
|
||||
void SetInitialEncoderState();
|
||||
void PushUniformBuffer(const void* data, u32 data_size);
|
||||
void PushRenderUniformBuffer(const void* data, u32 data_size);
|
||||
void SubmitDrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type);
|
||||
void SetViewportInRenderEncoder();
|
||||
void SetScissorInRenderEncoder();
|
||||
|
||||
@@ -667,9 +667,10 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
|
||||
return CreateShaderFromMSL(stage, source, entry_point, error);
|
||||
}
|
||||
|
||||
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
||||
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, Layout layout, MTLCullMode cull_mode,
|
||||
MTLPrimitiveType primitive)
|
||||
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
|
||||
: m_pipeline(pipeline), m_depth(depth), m_layout(layout), m_cull_mode(static_cast<u8>(cull_mode)),
|
||||
m_primitive(static_cast<u8>(primitive))
|
||||
{
|
||||
}
|
||||
|
||||
@@ -868,7 +869,7 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, cull_mode, primitive));
|
||||
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, config.layout, cull_mode, primitive));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -891,7 +892,8 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Comp
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint));
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new MetalPipeline(pipeline, nil, config.layout, MTLCullModeNone, MTLPrimitiveTypePoint));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1593,11 +1595,13 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
|
||||
[m_render_encoder setCullMode:MTLCullModeNone];
|
||||
if (depth != m_current_depth_state)
|
||||
[m_render_encoder setDepthStencilState:depth];
|
||||
[m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:0];
|
||||
[m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:VERTEX_BINDING_UBO];
|
||||
[m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:0 vertexCount:3];
|
||||
s_stats.num_draws++;
|
||||
|
||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer()
|
||||
offset:m_current_uniform_buffer_position
|
||||
atIndex:VERTEX_BINDING_UBO];
|
||||
if (m_current_pipeline)
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||
if (m_current_cull_mode != MTLCullModeNone)
|
||||
@@ -1826,14 +1830,6 @@ void MetalDevice::UnmapIndexBuffer(u32 used_index_count)
|
||||
m_index_buffer.CommitMemory(size);
|
||||
}
|
||||
|
||||
void MetalDevice::PushUniformBuffer(const void* data, u32 data_size)
|
||||
{
|
||||
s_stats.buffer_streamed += data_size;
|
||||
void* map = MapUniformBuffer(data_size);
|
||||
std::memcpy(map, data, data_size);
|
||||
UnmapUniformBuffer(data_size);
|
||||
}
|
||||
|
||||
void* MetalDevice::MapUniformBuffer(u32 size)
|
||||
{
|
||||
const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT);
|
||||
@@ -1854,8 +1850,8 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
|
||||
m_uniform_buffer.CommitMemory(size);
|
||||
if (InRenderPass())
|
||||
{
|
||||
[m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:0];
|
||||
[m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:0];
|
||||
[m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:VERTEX_BINDING_UBO];
|
||||
[m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:FRAGMENT_BINDING_UBO];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1983,7 +1979,7 @@ void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
|
||||
|
||||
m_current_ssbo = B;
|
||||
if (InRenderPass())
|
||||
[m_render_encoder setFragmentBuffer:B offset:0 atIndex:1];
|
||||
[m_render_encoder setFragmentBuffer:B offset:0 atIndex:FRAGMENT_BINDING_SSBO];
|
||||
}
|
||||
|
||||
void MetalDevice::UnbindTexture(MetalTexture* tex)
|
||||
@@ -2030,7 +2026,7 @@ void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf)
|
||||
|
||||
m_current_ssbo = nil;
|
||||
if (InRenderPass())
|
||||
[m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1];
|
||||
[m_render_encoder setFragmentBuffer:nil offset:0 atIndex:FRAGMENT_BINDING_SSBO];
|
||||
}
|
||||
|
||||
void MetalDevice::SetViewport(const GSVector4i rc)
|
||||
@@ -2241,9 +2237,15 @@ void MetalDevice::SetInitialEncoderState()
|
||||
// Set initial state.
|
||||
// TODO: avoid uniform set here? it's probably going to get changed...
|
||||
// Might be better off just deferring all the init until the first draw...
|
||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
||||
[m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
||||
[m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1];
|
||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer()
|
||||
offset:m_current_uniform_buffer_position
|
||||
atIndex:VERTEX_BINDING_UBO];
|
||||
[m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:VERTEX_BINDING_VBO];
|
||||
[m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer()
|
||||
offset:m_current_uniform_buffer_position
|
||||
atIndex:FRAGMENT_BINDING_UBO];
|
||||
if (m_current_ssbo)
|
||||
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:FRAGMENT_BINDING_SSBO];
|
||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||
if (m_current_depth_state != nil)
|
||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||
@@ -2251,8 +2253,6 @@ void MetalDevice::SetInitialEncoderState()
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
if (m_current_ssbo)
|
||||
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
|
||||
|
||||
if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))
|
||||
{
|
||||
@@ -2291,6 +2291,16 @@ void MetalDevice::PreDrawCheck()
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::PushRenderUniformBuffer(const void* data, u32 data_size)
|
||||
{
|
||||
DebugAssert(InRenderPass() && m_current_pipeline);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
|
||||
// Maybe we'd be better off with another buffer...
|
||||
[m_render_encoder setVertexBytes:data length:data_size atIndex:VERTEX_BINDING_PUSH_CONSTANTS];
|
||||
[m_render_encoder setFragmentBytes:data length:data_size atIndex:FRAGMENT_BINDING_PUSH_CONSTANTS];
|
||||
}
|
||||
|
||||
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
|
||||
{
|
||||
PreDrawCheck();
|
||||
@@ -2302,7 +2312,7 @@ void MetalDevice::DrawWithPushConstants(u32 vertex_count, u32 base_vertex, const
|
||||
u32 push_constants_size)
|
||||
{
|
||||
PreDrawCheck();
|
||||
PushUniformBuffer(push_constants, push_constants_size);
|
||||
PushRenderUniformBuffer(push_constants, push_constants_size);
|
||||
s_stats.num_draws++;
|
||||
[m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count];
|
||||
}
|
||||
@@ -2329,7 +2339,7 @@ void MetalDevice::DrawIndexedWithPushConstants(u32 index_count, u32 base_index,
|
||||
{
|
||||
PreDrawCheck();
|
||||
|
||||
PushUniformBuffer(push_constants, push_constants_size);
|
||||
PushRenderUniformBuffer(push_constants, push_constants_size);
|
||||
|
||||
s_stats.num_draws++;
|
||||
|
||||
@@ -2357,7 +2367,7 @@ void MetalDevice::DrawIndexedWithBarrierWithPushConstants(u32 index_count, u32 b
|
||||
{
|
||||
PreDrawCheck();
|
||||
|
||||
PushUniformBuffer(push_constants, push_constants_size);
|
||||
PushRenderUniformBuffer(push_constants, push_constants_size);
|
||||
|
||||
SubmitDrawIndexedWithBarrier(index_count, base_index, base_vertex, type);
|
||||
}
|
||||
@@ -2479,7 +2489,7 @@ void MetalDevice::DispatchWithPushConstants(u32 threads_x, u32 threads_y, u32 th
|
||||
}
|
||||
|
||||
DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline());
|
||||
PushUniformBuffer(push_constants, push_constants_size);
|
||||
[m_compute_encoder setBytes:push_constants length:push_constants_size atIndex:2];
|
||||
|
||||
// TODO: We could remap to the optimal group size..
|
||||
[m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z)
|
||||
|
||||
@@ -707,7 +707,8 @@ bool OpenGLDevice::CreateBuffers()
|
||||
{
|
||||
if (!(m_vertex_buffer = OpenGLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE)) ||
|
||||
!(m_index_buffer = OpenGLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE)) ||
|
||||
!(m_uniform_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE))) [[unlikely]]
|
||||
!(m_uniform_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE)) ||
|
||||
!(m_push_constant_buffer = OpenGLStreamBuffer::Create(GL_UNIFORM_BUFFER, PUSH_CONSTANT_BUFFER_SIZE))) [[unlikely]]
|
||||
{
|
||||
ERROR_LOG("Failed to create one or more device buffers.");
|
||||
return false;
|
||||
@@ -716,6 +717,7 @@ bool OpenGLDevice::CreateBuffers()
|
||||
GL_OBJECT_NAME(m_vertex_buffer, "Device Vertex Buffer");
|
||||
GL_OBJECT_NAME(m_index_buffer, "Device Index Buffer");
|
||||
GL_OBJECT_NAME(m_uniform_buffer, "Device Uniform Buffer");
|
||||
GL_OBJECT_NAME(m_push_constant_buffer, "Device Push Constant Buffer");
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
|
||||
m_uniform_buffer_alignment = std::max<GLuint>(m_uniform_buffer_alignment, 16);
|
||||
@@ -756,6 +758,7 @@ void OpenGLDevice::DestroyBuffers()
|
||||
if (m_read_fbo != 0)
|
||||
glDeleteFramebuffers(1, &m_read_fbo);
|
||||
m_texture_stream_buffer.reset();
|
||||
m_push_constant_buffer.reset();
|
||||
m_uniform_buffer.reset();
|
||||
m_index_buffer.reset();
|
||||
m_vertex_buffer.reset();
|
||||
@@ -1109,11 +1112,11 @@ void OpenGLDevice::UnmapIndexBuffer(u32 used_index_count)
|
||||
|
||||
void OpenGLDevice::PushUniformBuffer(const void* data, u32 data_size)
|
||||
{
|
||||
const auto res = m_uniform_buffer->Map(m_uniform_buffer_alignment, data_size);
|
||||
const auto res = m_push_constant_buffer->Map(m_uniform_buffer_alignment, data_size);
|
||||
std::memcpy(res.pointer, data, data_size);
|
||||
m_uniform_buffer->Unmap(data_size);
|
||||
m_push_constant_buffer->Unmap(data_size);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 0, m_uniform_buffer->GetGLBufferId(), res.buffer_offset, data_size);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_push_constant_buffer->GetGLBufferId(), res.buffer_offset, data_size);
|
||||
}
|
||||
|
||||
void* OpenGLDevice::MapUniformBuffer(u32 size)
|
||||
|
||||
@@ -165,6 +165,7 @@ private:
|
||||
static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||
static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
|
||||
static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
static constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
|
||||
bool CheckFeatures(CreateFlags create_flags);
|
||||
@@ -201,6 +202,7 @@ private:
|
||||
std::unique_ptr<OpenGLStreamBuffer> m_vertex_buffer;
|
||||
std::unique_ptr<OpenGLStreamBuffer> m_index_buffer;
|
||||
std::unique_ptr<OpenGLStreamBuffer> m_uniform_buffer;
|
||||
std::unique_ptr<OpenGLStreamBuffer> m_push_constant_buffer;
|
||||
std::unique_ptr<OpenGLStreamBuffer> m_texture_stream_buffer;
|
||||
|
||||
// TODO: pass in file instead of blob for pipeline cache
|
||||
|
||||
@@ -472,9 +472,12 @@ void OpenGLDevice::PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig,
|
||||
{
|
||||
if (!ShaderGen::UseGLSLBindingLayout())
|
||||
{
|
||||
GLint location = glGetUniformBlockIndex(program_id, "UBOBlock");
|
||||
if (location >= 0)
|
||||
glUniformBlockBinding(program_id, location, 0);
|
||||
const GLint ubo_location = glGetUniformBlockIndex(program_id, "UBOBlock");
|
||||
if (ubo_location >= 0)
|
||||
glUniformBlockBinding(program_id, ubo_location, 0);
|
||||
const GLint push_constant_location = glGetUniformBlockIndex(program_id, "PushConstants");
|
||||
if (push_constant_location >= 0)
|
||||
glUniformBlockBinding(program_id, push_constant_location, 1);
|
||||
|
||||
glUseProgram(program_id);
|
||||
|
||||
@@ -482,9 +485,9 @@ void OpenGLDevice::PostLinkProgram(const GPUPipeline::GraphicsConfig& plconfig,
|
||||
const u32 num_textures = std::max<u32>(GetActiveTexturesForLayout(plconfig.layout), 1);
|
||||
for (u32 i = 0; i < num_textures; i++)
|
||||
{
|
||||
location = glGetUniformLocation(program_id, TinyString::from_format("samp{}", i));
|
||||
if (location >= 0)
|
||||
glUniform1i(location, i);
|
||||
const GLint samp_location = glGetUniformLocation(program_id, TinyString::from_format("samp{}", i));
|
||||
if (samp_location >= 0)
|
||||
glUniform1i(samp_location, i);
|
||||
}
|
||||
|
||||
glUseProgram(m_last_program);
|
||||
|
||||
@@ -379,40 +379,42 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
|
||||
m_has_uniform_buffer = false;
|
||||
}
|
||||
|
||||
void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan) const
|
||||
void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant) const
|
||||
{
|
||||
const u32 binding = push_constant ? 1 : 0;
|
||||
const char* const name = push_constant ? "PushConstants" : "UBOBlock";
|
||||
if (m_shader_language == GPUShaderLanguage::GLSLVK)
|
||||
{
|
||||
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
|
||||
if (push_constant && (m_render_api == RenderAPI::Vulkan || m_render_api == RenderAPI::Metal))
|
||||
{
|
||||
ss << "layout(push_constant, row_major) uniform PushConstants\n";
|
||||
ss << "layout(push_constant, row_major) uniform " << name << "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "layout(std140, row_major, set = 0, binding = 0) uniform UBOBlock\n";
|
||||
ss << "layout(std140, row_major, set = 0, binding = " << binding << ") uniform " << name << "\n";
|
||||
m_has_uniform_buffer = true;
|
||||
}
|
||||
}
|
||||
else if (m_glsl)
|
||||
{
|
||||
if (m_use_glsl_binding_layout)
|
||||
ss << "layout(std140, row_major, binding = 0) uniform UBOBlock\n";
|
||||
ss << "layout(std140, row_major, binding = " << binding << ") uniform " << name << "\n";
|
||||
else
|
||||
ss << "layout(std140, row_major) uniform UBOBlock\n";
|
||||
ss << "layout(std140, row_major) uniform " << name << "\n";
|
||||
|
||||
m_has_uniform_buffer = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "cbuffer UBOBlock : register(b0)\n";
|
||||
ss << "cbuffer " << name << " : register(b" << binding << ")\n";
|
||||
m_has_uniform_buffer = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderGen::DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
|
||||
bool push_constant_on_vulkan) const
|
||||
bool push_constant) const
|
||||
{
|
||||
WriteUniformBufferDeclaration(ss, push_constant_on_vulkan);
|
||||
WriteUniformBufferDeclaration(ss, push_constant);
|
||||
|
||||
ss << "{\n";
|
||||
for (const char* member : members)
|
||||
|
||||
@@ -45,9 +45,9 @@ public:
|
||||
void DefineMacro(std::stringstream& ss, const char* name, s32 value) const;
|
||||
void WriteHeader(std::stringstream& ss, bool enable_rov = false, bool enable_framebuffer_fetch = false,
|
||||
bool enable_dual_source_blend = false) const;
|
||||
void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan) const;
|
||||
void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant) const;
|
||||
void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list<const char*>& members,
|
||||
bool push_constant_on_vulkan) const;
|
||||
bool push_constant) const;
|
||||
void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false,
|
||||
bool is_int = false, bool is_unsigned = false) const;
|
||||
void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned) const;
|
||||
|
||||
Reference in New Issue
Block a user