mirror of
https://github.com/stenzek/duckstation.git
synced 2026-02-04 21:25:32 +00:00
Compare commits
7 Commits
master
...
downsampli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
113614c2f5 | ||
|
|
ac41ace972 | ||
|
|
ebe782e4f4 | ||
|
|
e7439c1503 | ||
|
|
fda87de7e7 | ||
|
|
116bc83d09 | ||
|
|
ddffc055b9 |
@@ -2346,6 +2346,8 @@ public:
|
||||
return GSVector4i(vcombine_s32(xy.v2s, zw.v2s));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw) { return GSVector4i(vcombine_s32(xyzw.v2s, xyzw.v2s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(vget_low_s32(v4s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(vget_high_s32(v4s)); }
|
||||
|
||||
@@ -1665,6 +1665,8 @@ public:
|
||||
return GSVector4i(xy.x, xy.y, zw.x, zw.y);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw) { return GSVector4i(xyxy.x, xyxy.y, xyzw.x, xyzw.y); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(x, y); }
|
||||
ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(z, w); }
|
||||
|
||||
|
||||
@@ -1831,6 +1831,11 @@ public:
|
||||
|
||||
ALWAYS_INLINE GSVector4i xyxy(const GSVector4i& v) const { return upl64(v); }
|
||||
|
||||
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw)
|
||||
{
|
||||
return GSVector4i(_mm_unpacklo_epi64(xyzw.m, xyzw.m));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xy, const GSVector2i& zw)
|
||||
{
|
||||
return GSVector4i(_mm_unpacklo_epi64(xy.m, zw.m));
|
||||
|
||||
@@ -1031,19 +1031,14 @@ bool GPUBackend::Deinterlace(u32 field)
|
||||
const u32 height = m_display_texture_view_height;
|
||||
|
||||
const auto copy_to_field_buffer = [&](u32 buffer) {
|
||||
if (!m_deinterlace_buffers[buffer] || m_deinterlace_buffers[buffer]->GetWidth() != width ||
|
||||
m_deinterlace_buffers[buffer]->GetHeight() != height ||
|
||||
m_deinterlace_buffers[buffer]->GetFormat() != src->GetFormat())
|
||||
if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture,
|
||||
src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]]
|
||||
{
|
||||
if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture,
|
||||
src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]]
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer);
|
||||
|
||||
GL_INS_FMT("Copy {}x{} from {},{} to field buffer {}", width, height, x, y, buffer);
|
||||
g_gpu_device->CopyTextureRegion(m_deinterlace_buffers[buffer].get(), 0, 0, 0, 0, m_display_texture, x, y, 0, 0,
|
||||
width, height);
|
||||
@@ -1158,18 +1153,13 @@ bool GPUBackend::Deinterlace(u32 field)
|
||||
|
||||
bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve)
|
||||
{
|
||||
if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width ||
|
||||
m_deinterlace_texture->GetHeight() != height)
|
||||
if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget,
|
||||
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]]
|
||||
{
|
||||
if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget,
|
||||
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]]
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture");
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture");
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1179,19 +1169,15 @@ bool GPUBackend::ApplyChromaSmoothing()
|
||||
const u32 y = m_display_texture_view_y;
|
||||
const u32 width = m_display_texture_view_width;
|
||||
const u32 height = m_display_texture_view_height;
|
||||
if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width ||
|
||||
m_chroma_smoothing_texture->GetHeight() != height)
|
||||
if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget,
|
||||
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false))
|
||||
{
|
||||
if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget,
|
||||
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false))
|
||||
{
|
||||
ClearDisplayTexture();
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture");
|
||||
ClearDisplayTexture();
|
||||
return false;
|
||||
}
|
||||
|
||||
GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture");
|
||||
|
||||
GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height);
|
||||
|
||||
m_display_texture->MakeReadyForSampling();
|
||||
|
||||
@@ -172,10 +172,11 @@ protected:
|
||||
void DestroyDeinterlaceTextures();
|
||||
bool ApplyChromaSmoothing();
|
||||
|
||||
GSVector4i m_clamped_drawing_area = {};
|
||||
|
||||
s32 m_display_width = 0;
|
||||
s32 m_display_height = 0;
|
||||
|
||||
GSVector4i m_clamped_drawing_area = {};
|
||||
|
||||
s32 m_display_origin_left = 0;
|
||||
s32 m_display_origin_top = 0;
|
||||
s32 m_display_vram_width = 0;
|
||||
|
||||
@@ -41,8 +41,8 @@ LOG_CHANNEL(GPU_HW);
|
||||
// TODO: instead of full state restore, only restore what changed
|
||||
|
||||
static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8;
|
||||
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16;
|
||||
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F;
|
||||
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D32FS8;
|
||||
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32FS8;
|
||||
static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F;
|
||||
|
||||
#if defined(_DEBUG) || defined(_DEVEL)
|
||||
@@ -427,11 +427,13 @@ void GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss)
|
||||
|
||||
void GPU_HW::RestoreDeviceContext()
|
||||
{
|
||||
m_batch_ubo_dirty = true;
|
||||
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
SetVRAMRenderTarget();
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
|
||||
g_gpu_device->SetViewport(m_vram_texture->GetRect());
|
||||
SetScissor();
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
|
||||
@@ -447,7 +449,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
|
||||
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
|
||||
g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() ||
|
||||
m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() ||
|
||||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
|
||||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
|
||||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
|
||||
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
|
||||
const bool shaders_changed =
|
||||
((m_resolution_scale > 1) != (resolution_scale > 1) || m_multisamples != multisamples ||
|
||||
m_true_color != g_gpu_settings.gpu_true_color ||
|
||||
@@ -466,7 +470,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
|
||||
g_gpu_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) ||
|
||||
m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter,
|
||||
g_gpu_settings.gpu_sprite_texture_filter) ||
|
||||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
|
||||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
|
||||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
|
||||
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
|
||||
const bool resolution_dependent_shaders_changed =
|
||||
(m_resolution_scale != resolution_scale || m_multisamples != multisamples);
|
||||
const bool downsampling_shaders_changed =
|
||||
@@ -889,6 +895,7 @@ void GPU_HW::PrintSettingsToLog()
|
||||
GPUTexture::Format GPU_HW::GetDepthBufferFormat() const
|
||||
{
|
||||
// Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit.
|
||||
// TODO: AMD doesn't support D24S8
|
||||
return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) :
|
||||
VRAM_DS_FORMAT;
|
||||
}
|
||||
@@ -978,6 +985,10 @@ bool GPU_HW::CreateBuffers(Error* error)
|
||||
|
||||
SetVRAMRenderTarget();
|
||||
SetFullVRAMDirtyRectangle();
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -990,6 +1001,9 @@ void GPU_HW::ClearFramebuffer()
|
||||
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
|
||||
else
|
||||
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
|
||||
}
|
||||
ClearVRAMDirtyRectangle();
|
||||
if (m_use_texture_cache)
|
||||
@@ -1057,6 +1071,15 @@ bool GPU_HW::CompileCommonShaders(Error* error)
|
||||
if (!m_fullscreen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_fullscreen_quad_vertex_shader, "Fullscreen Quad Vertex Shader");
|
||||
|
||||
m_screen_quad_vertex_shader = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
|
||||
shadergen.GenerateScreenVertexShader(), error);
|
||||
if (!m_screen_quad_vertex_shader)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_screen_quad_vertex_shader, "Screen Quad Vertex Shader");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1410,13 +1433,27 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
{
|
||||
plconfig.depth.depth_test =
|
||||
m_pgxp_depth_buffer ?
|
||||
(depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) :
|
||||
(check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always);
|
||||
(depth_test ? GPUPipeline::ComparisonFunc::LessEqual : GPUPipeline::ComparisonFunc::Always) :
|
||||
(check_mask ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always);
|
||||
|
||||
// Don't write for transparent, but still test.
|
||||
plconfig.depth.depth_write =
|
||||
!m_pgxp_depth_buffer ||
|
||||
(depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled));
|
||||
|
||||
if (UseStencilBuffer())
|
||||
{
|
||||
const bool replace = (transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled) ||
|
||||
render_mode == static_cast<u8>(BatchRenderMode::TransparencyDisabled) ||
|
||||
render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque));
|
||||
plconfig.depth.stencil_enable = true;
|
||||
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
|
||||
plconfig.depth.back_stencil_pass_op =
|
||||
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
|
||||
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
|
||||
plconfig.depth.front_stencil_pass_op =
|
||||
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
|
||||
}
|
||||
}
|
||||
|
||||
plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT,
|
||||
@@ -1537,15 +1574,32 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
|
||||
batch_shader_guard.Run();
|
||||
|
||||
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
|
||||
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
|
||||
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
|
||||
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
|
||||
};
|
||||
|
||||
// common state
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
plconfig.input_layout.vertex_attributes = screen_vertex_attributes;
|
||||
plconfig.input_layout.vertex_stride = sizeof(ScreenVertex);
|
||||
plconfig.primitive = GPUPipeline::Primitive::TriangleStrips;
|
||||
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.per_sample_shading = false;
|
||||
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
|
||||
|
||||
if (UseStencilBuffer())
|
||||
{
|
||||
plconfig.depth.stencil_enable = true;
|
||||
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
|
||||
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Replace;
|
||||
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
|
||||
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Replace;
|
||||
}
|
||||
|
||||
// VRAM fill
|
||||
for (u8 wrapped = 0; wrapped < 2; wrapped++)
|
||||
{
|
||||
@@ -1560,8 +1614,9 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
|
||||
GPUPipeline::DepthState::GetNoTestsState();
|
||||
plconfig.depth.depth_test =
|
||||
needs_real_depth_buffer ? GPUPipeline::ComparisonFunc::Always : GPUPipeline::ComparisonFunc::Never;
|
||||
plconfig.depth.depth_write = needs_real_depth_buffer;
|
||||
|
||||
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
@@ -1587,7 +1642,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
|
||||
plconfig.depth.depth_write = needs_real_depth_buffer;
|
||||
plconfig.depth.depth_test =
|
||||
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
|
||||
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
|
||||
|
||||
if (!(m_vram_copy_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig), error))
|
||||
return false;
|
||||
@@ -1619,7 +1674,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
|
||||
plconfig.depth.depth_write = needs_real_depth_buffer;
|
||||
plconfig.depth.depth_test =
|
||||
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
|
||||
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
|
||||
|
||||
if (!(m_vram_write_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
@@ -1631,8 +1686,6 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
}
|
||||
}
|
||||
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
|
||||
// VRAM write replacement
|
||||
{
|
||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
||||
@@ -1641,7 +1694,10 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
return false;
|
||||
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.depth.depth_write = needs_real_depth_buffer;
|
||||
plconfig.depth.depth_test = GPUPipeline::ComparisonFunc::Always;
|
||||
|
||||
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
||||
@@ -1649,6 +1705,12 @@ bool GPU_HW::CompilePipelines(Error* error)
|
||||
return false;
|
||||
}
|
||||
|
||||
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
|
||||
plconfig.primitive = GPUPipeline::Primitive::Triangles;
|
||||
plconfig.input_layout.vertex_attributes = {};
|
||||
plconfig.input_layout.vertex_stride = 0;
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
|
||||
// VRAM update depth
|
||||
if (m_write_mask_as_depth)
|
||||
{
|
||||
@@ -1861,6 +1923,61 @@ bool GPU_HW::CompileDownsamplePipelines(Error* error)
|
||||
}
|
||||
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
|
||||
}
|
||||
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
|
||||
{
|
||||
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
|
||||
GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||
shadergen.GenerateAdaptiveStencilDownsampleBlurFragmentShader(m_resolution_scale, m_multisamples), error);
|
||||
if (!fs)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(fs, "Adaptive Stencil Downsample Fragment Shader");
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
|
||||
|
||||
if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_downsample_blur_pipeline, "Adaptive Stencil Downsample Pipeline");
|
||||
|
||||
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||
shadergen.GenerateAdaptiveStencilDownsampleCompositeFragmentShader(), error);
|
||||
if (!fs)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(fs, "Adaptive Stencil Composite Fragment Shader");
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
|
||||
|
||||
if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_downsample_composite_pipeline, "Adaptive Stencil Composite Pipeline");
|
||||
|
||||
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||
shadergen.GenerateFillFragmentShader(), error);
|
||||
if (!fs)
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(fs, "Adaptive Stencil Mark Fragment Shader");
|
||||
plconfig.fragment_shader = fs.get();
|
||||
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
plconfig.SetTargetFormats(GPUTexture::Format::R8, GetDepthBufferFormat());
|
||||
plconfig.samples = m_multisamples;
|
||||
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
plconfig.depth.stencil_enable = true;
|
||||
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Keep;
|
||||
plconfig.depth.front_stencil_fail_op = GPUPipeline::StencilOp::Keep;
|
||||
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Equal;
|
||||
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Keep;
|
||||
plconfig.depth.back_stencil_fail_op = GPUPipeline::StencilOp::Keep;
|
||||
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Equal;
|
||||
|
||||
if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
||||
GL_OBJECT_NAME(m_downsample_pass_pipeline, "Adaptive Stencil Downsample Pipeline");
|
||||
}
|
||||
else if (m_downsample_mode == GPUDownsampleMode::Box)
|
||||
{
|
||||
std::unique_ptr<GPUShader> fs =
|
||||
@@ -1954,6 +2071,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
|
||||
|
||||
void GPU_HW::UpdateDepthBufferFromMaskBit()
|
||||
{
|
||||
GL_SCOPE_FMT("UpdateDepthBufferFromMaskBit()");
|
||||
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
|
||||
|
||||
// Viewport should already be set full, only need to fudge the scissor.
|
||||
@@ -2449,10 +2567,28 @@ void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled)
|
||||
m_batch.sprite_mode = enabled;
|
||||
}
|
||||
|
||||
void GPU_HW::SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value)
|
||||
{
|
||||
if (!UseStencilBuffer() || m_batch.stencil_reference == value)
|
||||
return;
|
||||
|
||||
if (m_batch_index_count > 0)
|
||||
{
|
||||
FlushRender();
|
||||
EnsureVertexBufferSpaceForCommand(cmd);
|
||||
}
|
||||
|
||||
GL_INS_FMT("Stencil reference is now {}", value);
|
||||
|
||||
m_batch.stencil_reference = value;
|
||||
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
|
||||
}
|
||||
|
||||
void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
|
||||
{
|
||||
PrepareDraw(cmd);
|
||||
SetBatchDepthBuffer(cmd, false);
|
||||
SetBatchStencilReference(cmd, 0);
|
||||
|
||||
const u32 num_vertices = cmd->num_vertices;
|
||||
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
|
||||
@@ -2497,6 +2633,7 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
|
||||
|
||||
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
|
||||
SetBatchDepthBuffer(cmd, use_depth);
|
||||
SetBatchStencilReference(cmd, BoolToUInt8(use_depth));
|
||||
|
||||
const u32 num_vertices = cmd->num_vertices;
|
||||
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
|
||||
@@ -2656,6 +2793,7 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
|
||||
PrepareDraw(cmd);
|
||||
SetBatchDepthBuffer(cmd, false);
|
||||
SetBatchSpriteMode(cmd, m_allow_sprite_mode);
|
||||
SetBatchStencilReference(cmd, 0);
|
||||
DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
|
||||
|
||||
const s32 pos_x = cmd->x;
|
||||
@@ -2933,10 +3071,12 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand
|
||||
const GSVector4i clamped_draw_rect_123)
|
||||
{
|
||||
// Use PGXP to exclude primitives that are definitely 3D.
|
||||
const bool really_3d = is_precise ? is_3d : IsPossibleSpritePolygon(vertices.data());
|
||||
if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon)
|
||||
HandleFlippedQuadTextureCoordinates(cmd, vertices.data());
|
||||
else if (m_allow_sprite_mode)
|
||||
SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data()));
|
||||
SetBatchSpriteMode(cmd, !really_3d);
|
||||
SetBatchStencilReference(cmd, BoolToUInt8(really_3d));
|
||||
|
||||
if (cmd->texture_enable && m_compute_uv_range)
|
||||
ComputePolygonUVLimits(cmd, vertices.data(), num_vertices);
|
||||
@@ -2997,9 +3137,13 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
|
||||
|
||||
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
|
||||
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
|
||||
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->SetStencilRef(0);
|
||||
|
||||
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
|
||||
g_gpu_device->SetScissor(rect);
|
||||
DrawScreenQuad(rect);
|
||||
RestoreDeviceContext();
|
||||
return true;
|
||||
}
|
||||
@@ -3036,10 +3180,9 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCo
|
||||
if (m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT)
|
||||
{
|
||||
DebugAssert(!(m_texpage_dirty & (TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT)));
|
||||
DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture &&
|
||||
m_batch.texture_cache_key.page < NUM_VRAM_PAGES);
|
||||
DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture && m_texture_cache_key.page < NUM_VRAM_PAGES);
|
||||
|
||||
if (GPUTextureCache::AreSourcePagesDrawn(m_batch.texture_cache_key, m_current_uv_rect))
|
||||
if (GPUTextureCache::AreSourcePagesDrawn(m_texture_cache_key, m_current_uv_rect))
|
||||
{
|
||||
// UVs intersect with drawn area, can't use TC
|
||||
if (m_batch_index_count > 0)
|
||||
@@ -3049,7 +3192,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCo
|
||||
}
|
||||
|
||||
// We need to swap the dirty tracking over to drawn/written.
|
||||
const GSVector4i page_rect = GetTextureRect(m_batch.texture_cache_key.page, m_batch.texture_cache_key.mode);
|
||||
const GSVector4i page_rect = GetTextureRect(m_texture_cache_key.page, m_texture_cache_key.mode);
|
||||
m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) |
|
||||
(m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0);
|
||||
m_compute_uv_range = (ShouldCheckForTexPageOverlap() || m_clamp_uvs);
|
||||
@@ -3191,11 +3334,40 @@ void GPU_HW::ResetBatchVertexDepth()
|
||||
m_current_depth = 1;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool GPU_HW::UseStencilBuffer() const
|
||||
{
|
||||
return (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const
|
||||
{
|
||||
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE void GPU_HW::DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds)
|
||||
{
|
||||
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
|
||||
const GSVector2 fsize = GSVector2(m_vram_texture->GetSizeVec());
|
||||
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
|
||||
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
|
||||
const GSVector4 xy = GSVector4::xyxy(x, y).xzyw();
|
||||
|
||||
DebugAssert(IsFlushed());
|
||||
|
||||
ScreenVertex* vertices;
|
||||
u32 space;
|
||||
u32 base_vertex;
|
||||
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
|
||||
|
||||
GSVector4::store<false>(&vertices[0], GSVector4::xyxy(xy.xy(), uv_bounds.xy()));
|
||||
GSVector4::store<false>(&vertices[1], GSVector4::xyxy(xy.zyzw().xy(), uv_bounds.zyzw().xy()));
|
||||
GSVector4::store<false>(&vertices[2], GSVector4::xyxy(xy.xwzw().xy(), uv_bounds.xwzw().xy()));
|
||||
GSVector4::store<false>(&vertices[3], GSVector4::xyxy(xy.zw(), uv_bounds.zw()));
|
||||
|
||||
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
|
||||
g_gpu_device->Draw(4, base_vertex);
|
||||
}
|
||||
|
||||
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb)
|
||||
{
|
||||
FlushRender();
|
||||
@@ -3225,9 +3397,6 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
||||
const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT));
|
||||
g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get());
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(scaled_bounds);
|
||||
|
||||
struct VRAMFillUBOData
|
||||
{
|
||||
u32 u_dst_x;
|
||||
@@ -3247,7 +3416,13 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
|
||||
GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
|
||||
uniforms.u_interlaced_displayed_field = active_line_lsb;
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->SetStencilRef(0);
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds);
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
@@ -3357,14 +3532,15 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
||||
{
|
||||
DeactivateROV();
|
||||
|
||||
std::unique_ptr<GPUTexture> upload_texture;
|
||||
GPUDevice::AutoRecycleTexture upload_texture;
|
||||
u32 map_index;
|
||||
|
||||
if (!g_gpu_device->GetFeatures().supports_texture_buffers)
|
||||
{
|
||||
map_index = 0;
|
||||
upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture,
|
||||
GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch);
|
||||
upload_texture =
|
||||
g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U,
|
||||
GPUTexture::Flags::None, data, data_pitch);
|
||||
if (!upload_texture)
|
||||
{
|
||||
ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height);
|
||||
@@ -3406,21 +3582,20 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
|
||||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// the viewport should already be set to the full vram, so just adjust the scissor
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
|
||||
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->SetStencilRef(0);
|
||||
|
||||
if (upload_texture)
|
||||
{
|
||||
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
g_gpu_device->RecycleTexture(std::move(upload_texture));
|
||||
}
|
||||
else
|
||||
{
|
||||
g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
|
||||
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(scaled_bounds);
|
||||
DrawScreenQuad(scaled_bounds);
|
||||
|
||||
RestoreDeviceContext();
|
||||
}
|
||||
@@ -3492,12 +3667,16 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
|
||||
GetCurrentNormalizedVertexDepth()};
|
||||
|
||||
// VRAM read texture should already be bound.
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
|
||||
g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
|
||||
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
if (UseStencilBuffer())
|
||||
g_gpu_device->SetStencilRef(0);
|
||||
|
||||
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
|
||||
g_gpu_device->SetScissor(dst_bounds_scaled);
|
||||
DrawScreenQuad(dst_bounds_scaled);
|
||||
RestoreDeviceContext();
|
||||
|
||||
if (check_mask && !m_pgxp_depth_buffer)
|
||||
@@ -3560,7 +3739,7 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
|
||||
{
|
||||
// TODO: avoid all this for vertex loading, only do when the type of draw changes
|
||||
BatchTextureMode texture_mode = cmd->texture_enable ? m_batch.texture_mode : BatchTextureMode::Disabled;
|
||||
GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key;
|
||||
GPUTextureCache::SourceKey texture_cache_key = m_texture_cache_key;
|
||||
if (cmd->texture_enable)
|
||||
{
|
||||
// texture page changed - check that the new page doesn't intersect the drawing area
|
||||
@@ -3668,9 +3847,9 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
|
||||
{
|
||||
if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode ||
|
||||
(transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) ||
|
||||
dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != cmd->window ||
|
||||
dithering_enable != m_batch.dithering || m_texture_window_bits != cmd->window ||
|
||||
m_batch_ubo_data.u_set_mask_while_drawing != BoolToUInt32(cmd->set_mask_while_drawing) ||
|
||||
(texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key))
|
||||
(texture_mode == BatchTextureMode::PageTexture && m_texture_cache_key != texture_cache_key))
|
||||
{
|
||||
FlushRender();
|
||||
}
|
||||
@@ -3717,13 +3896,13 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
|
||||
m_batch.texture_mode = texture_mode;
|
||||
m_batch.transparency_mode = transparency_mode;
|
||||
m_batch.dithering = dithering_enable;
|
||||
m_batch.texture_cache_key = texture_cache_key;
|
||||
m_texture_cache_key = texture_cache_key;
|
||||
|
||||
if (m_batch_ubo_data.u_texture_window_bits != cmd->window)
|
||||
if (m_texture_window_bits != cmd->window)
|
||||
{
|
||||
m_batch_ubo_data.u_texture_window_bits = cmd->window;
|
||||
m_texture_window_bits = cmd->window;
|
||||
m_texture_window_active = (cmd->window != GPUTextureWindow{{0xFF, 0xFF, 0x00, 0x00}});
|
||||
GSVector4i::store<true>(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32());
|
||||
GSVector4i::store<false>(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32());
|
||||
m_batch_ubo_dirty = true;
|
||||
}
|
||||
|
||||
@@ -3767,7 +3946,7 @@ void GPU_HW::FlushRender()
|
||||
const GPUTextureCache::Source* texture = nullptr;
|
||||
if (m_batch.texture_mode == BatchTextureMode::PageTexture)
|
||||
{
|
||||
texture = LookupSource(m_batch.texture_cache_key, m_current_uv_rect,
|
||||
texture = LookupSource(m_texture_cache_key, m_current_uv_rect,
|
||||
m_batch.transparency_mode != GPUTransparencyMode::Disabled ?
|
||||
GPUTextureCache::PaletteRecordFlags::HasSemiTransparentDraws :
|
||||
GPUTextureCache::PaletteRecordFlags::None);
|
||||
@@ -3884,26 +4063,20 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!m_vram_extract_texture || m_vram_extract_texture->GetWidth() != scaled_display_width ||
|
||||
m_vram_extract_texture->GetHeight() != scaled_display_height)
|
||||
if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, scaled_display_height,
|
||||
GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8,
|
||||
GPUTexture::Flags::None)) [[unlikely]]
|
||||
{
|
||||
if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, scaled_display_height,
|
||||
GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8,
|
||||
GPUTexture::Flags::None)) [[unlikely]]
|
||||
{
|
||||
ClearDisplayTexture();
|
||||
return;
|
||||
}
|
||||
ClearDisplayTexture();
|
||||
return;
|
||||
}
|
||||
|
||||
m_vram_texture->MakeReadyForSampling();
|
||||
g_gpu_device->InvalidateRenderTarget(m_vram_extract_texture.get());
|
||||
|
||||
if (depth_source &&
|
||||
((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width &&
|
||||
m_vram_extract_depth_texture->GetHeight() == scaled_display_height) ||
|
||||
!g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height,
|
||||
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None)))
|
||||
g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height,
|
||||
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None))
|
||||
{
|
||||
depth_source->MakeReadyForSampling();
|
||||
g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get());
|
||||
@@ -3974,7 +4147,23 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
|
||||
if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit)
|
||||
{
|
||||
DebugAssert(m_display_texture);
|
||||
DownsampleFramebuffer();
|
||||
|
||||
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
|
||||
{
|
||||
DownsampleFramebufferAdaptive(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
|
||||
m_display_texture_view_width, m_display_texture_view_height);
|
||||
}
|
||||
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
|
||||
{
|
||||
DownsampleFramebufferAdaptiveStencil(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
|
||||
m_display_texture_view_width, m_display_texture_view_height,
|
||||
scaled_vram_offset_x, scaled_vram_offset_y, line_skip);
|
||||
}
|
||||
else
|
||||
{
|
||||
DownsampleFramebufferBoxFilter(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
|
||||
m_display_texture_view_width, m_display_texture_view_height);
|
||||
}
|
||||
}
|
||||
|
||||
if (drew_anything)
|
||||
@@ -3993,6 +4182,10 @@ void GPU_HW::UpdateDownsamplingLevels()
|
||||
current_width /= 2;
|
||||
}
|
||||
}
|
||||
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
|
||||
{
|
||||
m_downsample_scale_or_levels = m_resolution_scale;
|
||||
}
|
||||
else if (m_downsample_mode == GPUDownsampleMode::Box)
|
||||
{
|
||||
m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale);
|
||||
@@ -4012,20 +4205,6 @@ void GPU_HW::OnBufferSwapped()
|
||||
m_depth_was_copied = false;
|
||||
}
|
||||
|
||||
void GPU_HW::DownsampleFramebuffer()
|
||||
{
|
||||
GPUTexture* source = m_display_texture;
|
||||
const u32 left = m_display_texture_view_x;
|
||||
const u32 top = m_display_texture_view_y;
|
||||
const u32 width = m_display_texture_view_width;
|
||||
const u32 height = m_display_texture_view_height;
|
||||
|
||||
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
|
||||
DownsampleFramebufferAdaptive(source, left, top, width, height);
|
||||
else
|
||||
DownsampleFramebufferBoxFilter(source, left, top, width, height);
|
||||
}
|
||||
|
||||
void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
|
||||
{
|
||||
GL_PUSH_FMT("DownsampleFramebufferAdaptive ({},{} => {},{})", left, top, left + width, left + height);
|
||||
@@ -4146,6 +4325,94 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
|
||||
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
|
||||
}
|
||||
|
||||
void GPU_HW::DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height,
|
||||
u32 fb_left, u32 fb_top, u32 line_skip)
|
||||
{
|
||||
GL_PUSH_FMT("DownsampleFramebufferAdaptiveStencil({},{} => {},{} ({}x{})", left, top, left + width, top + height,
|
||||
width, height);
|
||||
|
||||
const u32 ds_width = width / m_downsample_scale_or_levels;
|
||||
const u32 ds_height = height / m_downsample_scale_or_levels;
|
||||
|
||||
// TODO: Weight texture is broken with MSAA
|
||||
const bool output_texture_ok =
|
||||
g_gpu_device->ResizeTexture(&m_downsample_texture, width, height, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT,
|
||||
GPUTexture::Flags::None, false);
|
||||
GPUDevice::AutoRecycleTexture downsample_texture = g_gpu_device->FetchAutoRecycleTexture(
|
||||
ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT, GPUTexture::Flags::None);
|
||||
GPUDevice::AutoRecycleTexture weight_texture = g_gpu_device->FetchAutoRecycleTexture(
|
||||
m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(),
|
||||
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8, GPUTexture::Flags::None);
|
||||
if (!output_texture_ok || !downsample_texture || !weight_texture)
|
||||
{
|
||||
ERROR_LOG("Failed to create {}x{} RT for adaptive stencil downsampling", width, height);
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
// fill weight texture
|
||||
GL_SCOPE("Weights");
|
||||
|
||||
const float fill_uniforms_unmarked[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
const float fill_uniforms_marked[4] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
g_gpu_device->SetViewportAndScissor(fb_left, fb_top, width, height << line_skip);
|
||||
|
||||
g_gpu_device->InvalidateRenderTarget(weight_texture.get());
|
||||
g_gpu_device->SetRenderTarget(weight_texture.get(), m_vram_depth_texture.get());
|
||||
g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
|
||||
|
||||
g_gpu_device->SetStencilRef(0);
|
||||
g_gpu_device->PushUniformBuffer(fill_uniforms_unmarked, sizeof(fill_uniforms_unmarked));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
g_gpu_device->SetStencilRef(1);
|
||||
g_gpu_device->PushUniformBuffer(fill_uniforms_marked, sizeof(fill_uniforms_marked));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
|
||||
// box downsample
|
||||
{
|
||||
GL_SCOPE("Box downsample");
|
||||
source->MakeReadyForSampling();
|
||||
|
||||
const u32 uniforms[9] = {left, top, fb_left, fb_top, line_skip};
|
||||
|
||||
g_gpu_device->InvalidateRenderTarget(downsample_texture.get());
|
||||
g_gpu_device->SetRenderTarget(downsample_texture.get());
|
||||
g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get());
|
||||
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->SetTextureSampler(1, weight_texture.get(), g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height);
|
||||
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
|
||||
// composite
|
||||
{
|
||||
GL_SCOPE("Composite");
|
||||
|
||||
const GSVector4 nat_uniforms =
|
||||
GSVector4(GSVector4i(left, top, width, height)) / GSVector4(GSVector4i::xyxy(source->GetSizeVec()));
|
||||
|
||||
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
|
||||
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
|
||||
g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get());
|
||||
g_gpu_device->SetTextureSampler(0, downsample_texture.get(), g_gpu_device->GetLinearSampler());
|
||||
g_gpu_device->SetTextureSampler(1, source, g_gpu_device->GetNearestSampler());
|
||||
g_gpu_device->SetViewportAndScissor(0, 0, width, height);
|
||||
g_gpu_device->PushUniformBuffer(&nat_uniforms, sizeof(nat_uniforms));
|
||||
g_gpu_device->Draw(3, 0);
|
||||
|
||||
m_downsample_texture->MakeReadyForSampling();
|
||||
}
|
||||
|
||||
GL_POP();
|
||||
RestoreDeviceContext();
|
||||
|
||||
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
|
||||
}
|
||||
|
||||
void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
|
||||
{
|
||||
GL_SCOPE_FMT("DownsampleFramebufferBoxFilter({},{} => {},{} ({}x{})", left, top, left + width, top + height, width,
|
||||
@@ -4154,14 +4421,8 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
|
||||
const u32 ds_width = width / m_downsample_scale_or_levels;
|
||||
const u32 ds_height = height / m_downsample_scale_or_levels;
|
||||
|
||||
if (!m_downsample_texture || m_downsample_texture->GetWidth() != ds_width ||
|
||||
m_downsample_texture->GetHeight() != ds_height)
|
||||
{
|
||||
g_gpu_device->RecycleTexture(std::move(m_downsample_texture));
|
||||
m_downsample_texture = g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
|
||||
VRAM_RT_FORMAT, GPUTexture::Flags::None);
|
||||
}
|
||||
if (!m_downsample_texture)
|
||||
if (!g_gpu_device->ResizeTexture(&m_downsample_texture, ds_width, ds_height, GPUTexture::Type::RenderTarget,
|
||||
VRAM_RT_FORMAT, GPUTexture::Flags::None, false))
|
||||
{
|
||||
ERROR_LOG("Failed to create {}x{} RT for box downsampling", width, height);
|
||||
return;
|
||||
|
||||
@@ -15,12 +15,6 @@
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
class Error;
|
||||
|
||||
class GPU_SW_Backend;
|
||||
struct GPUBackendCommand;
|
||||
struct GPUBackendDrawCommand;
|
||||
|
||||
// TODO: Move to cpp
|
||||
// TODO: Rename to GPUHWBackend, preserved to avoid conflicts.
|
||||
class GPU_HW final : public GPUBackend
|
||||
@@ -121,6 +115,14 @@ private:
|
||||
|
||||
static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));
|
||||
|
||||
struct alignas(16) ScreenVertex
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
float u;
|
||||
float v;
|
||||
};
|
||||
|
||||
struct alignas(16) BatchVertex
|
||||
{
|
||||
float x;
|
||||
@@ -139,7 +141,7 @@ private:
|
||||
void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
|
||||
};
|
||||
|
||||
struct alignas(4) BatchConfig
|
||||
struct BatchConfig
|
||||
{
|
||||
BatchTextureMode texture_mode = BatchTextureMode::Disabled;
|
||||
GPUTransparencyMode transparency_mode = GPUTransparencyMode::Disabled;
|
||||
@@ -149,14 +151,13 @@ private:
|
||||
bool check_mask_before_draw = false;
|
||||
bool use_depth_buffer = false;
|
||||
bool sprite_mode = false;
|
||||
|
||||
GPUTextureCache::SourceKey texture_cache_key = {};
|
||||
u8 stencil_reference = 0;
|
||||
|
||||
// Returns the render mode for this batch.
|
||||
BatchRenderMode GetRenderMode() const;
|
||||
};
|
||||
|
||||
struct alignas(VECTOR_ALIGNMENT) BatchUBOData
|
||||
struct BatchUBOData
|
||||
{
|
||||
u32 u_texture_window[4]; // and_x, and_y, or_x, or_y
|
||||
float u_src_alpha_factor;
|
||||
@@ -166,7 +167,6 @@ private:
|
||||
float u_resolution_scale;
|
||||
float u_rcp_resolution_scale;
|
||||
float u_resolution_scale_minus_one;
|
||||
GPUTextureWindow u_texture_window_bits; // not actually used on GPU
|
||||
};
|
||||
|
||||
struct RendererStats
|
||||
@@ -223,6 +223,8 @@ private:
|
||||
bool ShouldCheckForTexPageOverlap() const;
|
||||
|
||||
bool IsFlushed() const;
|
||||
void DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
|
||||
|
||||
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
|
||||
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
|
||||
void PrepareDraw(const GPUBackendDrawCommand* cmd);
|
||||
@@ -233,6 +235,9 @@ private:
|
||||
const GSVector4i clamped_draw_rect_123);
|
||||
void ResetBatchVertexDepth();
|
||||
|
||||
/// Returns true if the stencil buffer should be filled.
|
||||
bool UseStencilBuffer() const;
|
||||
|
||||
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
|
||||
float GetCurrentNormalizedVertexDepth() const;
|
||||
|
||||
@@ -266,11 +271,13 @@ private:
|
||||
void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled);
|
||||
void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices);
|
||||
void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled);
|
||||
void SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value);
|
||||
|
||||
void UpdateDownsamplingLevels();
|
||||
|
||||
void DownsampleFramebuffer();
|
||||
void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
|
||||
void DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height, u32 fb_left,
|
||||
u32 fb_top, u32 line_skip);
|
||||
void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
|
||||
|
||||
std::unique_ptr<GPUTexture> m_vram_texture;
|
||||
@@ -329,6 +336,7 @@ private:
|
||||
|
||||
// Changed state
|
||||
BatchUBOData m_batch_ubo_data = {};
|
||||
GPUTextureCache::SourceKey m_texture_cache_key = {};
|
||||
|
||||
// Bounding box of VRAM area that the GPU has drawn into.
|
||||
GSVector4i m_vram_dirty_draw_rect = INVALID_RECT;
|
||||
@@ -349,6 +357,8 @@ private:
|
||||
u32 bits = INVALID_DRAW_MODE_BITS;
|
||||
} m_draw_mode = {};
|
||||
|
||||
GPUTextureWindow m_texture_window_bits;
|
||||
|
||||
std::unique_ptr<GPUPipeline> m_wireframe_pipeline;
|
||||
|
||||
// [wrapped][interlaced]
|
||||
@@ -380,4 +390,5 @@ private:
|
||||
|
||||
// common shaders
|
||||
std::unique_ptr<GPUShader> m_fullscreen_quad_vertex_shader;
|
||||
std::unique_ptr<GPUShader> m_screen_quad_vertex_shader;
|
||||
};
|
||||
|
||||
@@ -50,6 +50,27 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) const
|
||||
false);
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateScreenVertexShader() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false);
|
||||
ss << R"(
|
||||
{
|
||||
// Depth set to 1 for PGXP depth buffer.
|
||||
v_pos = float4(a_pos, 1.0f, 1.0f);
|
||||
v_tex0 = a_tex0;
|
||||
|
||||
// NDC space Y flip in Vulkan.
|
||||
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
|
||||
v_pos.y = -v_pos.y;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading,
|
||||
bool textured, bool palette, bool page_texture, bool uv_limits,
|
||||
bool force_round_texcoords, bool pgxp_depth,
|
||||
@@ -1803,6 +1824,73 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 multisamples) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DefineMacro(ss, "MULTISAMPLES", multisamples);
|
||||
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_fb_base_coords", "uint u_line_skip"}, true);
|
||||
DeclareTexture(ss, "samp0", 0, false);
|
||||
DeclareTexture(ss, "samp1", 1, (multisamples > 1));
|
||||
|
||||
ss << "#define FACTOR " << factor << "u\n";
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
|
||||
ss << R"(
|
||||
{
|
||||
float3 color = float3(0.0, 0.0, 0.0);
|
||||
float weight = 0.0;
|
||||
uint2 base_coords = u_base_coords + uint2(v_pos.xy) * uint2(FACTOR, FACTOR);
|
||||
uint2 fb_base_coords = u_fb_base_coords + uint2(uint(v_pos.x) * FACTOR, (uint(v_pos.y) << u_line_skip) * FACTOR);
|
||||
for (uint offset_x = 0u; offset_x < FACTOR; offset_x++)
|
||||
{
|
||||
for (uint offset_y = 0u; offset_y < FACTOR; offset_y++)
|
||||
{
|
||||
int2 lcoords = int2(base_coords + uint2(offset_x, offset_y));
|
||||
color += LOAD_TEXTURE(samp0, lcoords, 0).rgb;
|
||||
|
||||
int2 fbcoords = int2(fb_base_coords + uint2(offset_x, offset_y << u_line_skip));
|
||||
#if MULTISAMPLES > 1
|
||||
for (int i = 0; i < MULTISAMPLES; i++)
|
||||
weight += LOAD_TEXTURE_MS(samp1, fbcoords, i).r;
|
||||
#else
|
||||
weight += LOAD_TEXTURE(samp1, fbcoords, 0).r;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
color /= float(FACTOR * FACTOR);
|
||||
o_col0 = float4(color, float(weight != 0.0));
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
|
||||
DeclareUniformBuffer(ss, {"float4 u_native_rect"}, true);
|
||||
|
||||
DeclareTexture(ss, "samp0", 0, false);
|
||||
DeclareTexture(ss, "samp1", 1, false);
|
||||
DeclareTexture(ss, "samp2", 2, false);
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
|
||||
ss << R"(
|
||||
{
|
||||
float4 downsample_color = SAMPLE_TEXTURE(samp0, v_tex0);
|
||||
float native_weight = float(downsample_color.a != 0.0);
|
||||
float2 native_coords = u_native_rect.xy + v_tex0 * u_native_rect.zw;
|
||||
float4 native_color = SAMPLE_TEXTURE(samp1, native_coords);
|
||||
|
||||
o_col0 = lerp(downsample_color, native_color, native_weight);
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
@@ -13,6 +13,8 @@ public:
|
||||
GPU_HW_ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch);
|
||||
~GPU_HW_ShaderGen();
|
||||
|
||||
std::string GenerateScreenVertexShader() const;
|
||||
|
||||
std::string GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading, bool textured, bool palette,
|
||||
bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth,
|
||||
bool disable_color_perspective) const;
|
||||
@@ -41,6 +43,8 @@ public:
|
||||
std::string GenerateAdaptiveDownsampleBlurFragmentShader() const;
|
||||
std::string GenerateAdaptiveDownsampleCompositeFragmentShader() const;
|
||||
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor) const;
|
||||
std::string GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 msaa) const;
|
||||
std::string GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const;
|
||||
|
||||
std::string GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const;
|
||||
|
||||
|
||||
@@ -1565,11 +1565,12 @@ const char* Settings::GetLineDetectModeDisplayName(GPULineDetectMode mode)
|
||||
"GPULineDetectMode");
|
||||
}
|
||||
|
||||
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive"};
|
||||
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive", "AdaptiveStencil"};
|
||||
static constexpr const std::array s_downsample_mode_display_names = {
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Disabled", "GPUDownsampleMode"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Box (Downsample 3D/Smooth All)", "GPUDownsampleMode"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive Sharp (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
|
||||
|
||||
std::optional<GPUDownsampleMode> Settings::ParseDownsampleModeName(const char* str)
|
||||
{
|
||||
|
||||
@@ -105,6 +105,7 @@ enum class GPUDownsampleMode : u8
|
||||
Disabled,
|
||||
Box,
|
||||
Adaptive,
|
||||
AdaptiveStencil,
|
||||
Count
|
||||
};
|
||||
|
||||
|
||||
@@ -646,6 +646,13 @@ void D3D11Device::ClearDepth(GPUTexture* t, float d)
|
||||
T->CommitClear(m_context.Get());
|
||||
}
|
||||
|
||||
void D3D11Device::ClearStencil(GPUTexture* t, u8 value)
|
||||
{
|
||||
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
|
||||
DebugAssert(T->HasStencil());
|
||||
m_context->ClearDepthStencilView(T->GetD3DDSV(), D3D11_CLEAR_STENCIL, 0.0f, value);
|
||||
}
|
||||
|
||||
void D3D11Device::InvalidateRenderTarget(GPUTexture* t)
|
||||
{
|
||||
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
|
||||
@@ -1146,6 +1153,16 @@ void D3D11Device::SetScissor(const GSVector4i rc)
|
||||
m_context->RSSetScissorRects(1, &drc);
|
||||
}
|
||||
|
||||
void D3D11Device::SetStencilRef(u8 value)
|
||||
{
|
||||
if (m_current_stencil_ref == value)
|
||||
return;
|
||||
|
||||
m_current_stencil_ref = value;
|
||||
if (m_current_pipeline)
|
||||
m_context->OMSetDepthStencilState(m_current_pipeline->GetDepthStencilState(), m_current_stencil_ref);
|
||||
}
|
||||
|
||||
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
|
||||
{
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||
|
||||
@@ -70,6 +70,7 @@ public:
|
||||
u32 src_x, u32 src_y, u32 width, u32 height) override;
|
||||
void ClearRenderTarget(GPUTexture* t, u32 c) override;
|
||||
void ClearDepth(GPUTexture* t, float d) override;
|
||||
void ClearStencil(GPUTexture* t, u8 value) override;
|
||||
void InvalidateRenderTarget(GPUTexture* t) override;
|
||||
|
||||
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
|
||||
@@ -101,6 +102,7 @@ public:
|
||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||
void SetViewport(const GSVector4i rc) override;
|
||||
void SetScissor(const GSVector4i rc) override;
|
||||
void SetStencilRef(u8 value) override;
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
@@ -131,7 +133,7 @@ private:
|
||||
size_t operator()(const BlendStateMapKey& key) const;
|
||||
};
|
||||
using RasterizationStateMap = std::unordered_map<u8, ComPtr<ID3D11RasterizerState>>;
|
||||
using DepthStateMap = std::unordered_map<u8, ComPtr<ID3D11DepthStencilState>>;
|
||||
using DepthStateMap = std::unordered_map<u32, ComPtr<ID3D11DepthStencilState>>;
|
||||
using BlendStateMap = std::unordered_map<BlendStateMapKey, ComPtr<ID3D11BlendState>, BlendStateMapHash>;
|
||||
using InputLayoutMap =
|
||||
std::unordered_map<GPUPipeline::InputLayout, ComPtr<ID3D11InputLayout>, GPUPipeline::InputLayoutHash>;
|
||||
@@ -198,6 +200,7 @@ private:
|
||||
D3D_PRIMITIVE_TOPOLOGY m_current_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
||||
u32 m_current_vertex_stride = 0;
|
||||
u32 m_current_blend_factor = 0;
|
||||
u8 m_current_stencil_ref = 0;
|
||||
|
||||
std::array<ID3D11ShaderResourceView*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
|
||||
std::array<ID3D11SamplerState*, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
|
||||
|
||||
@@ -192,8 +192,8 @@ D3D11Device::ComPtr<ID3D11DepthStencilState> D3D11Device::GetDepthState(const GP
|
||||
return dds;
|
||||
}
|
||||
|
||||
static constexpr std::array<D3D11_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping =
|
||||
{{
|
||||
static constexpr std::array<D3D11_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)>
|
||||
func_mapping = {{
|
||||
D3D11_COMPARISON_NEVER, // Never
|
||||
D3D11_COMPARISON_ALWAYS, // Always
|
||||
D3D11_COMPARISON_LESS, // Less
|
||||
@@ -203,10 +203,36 @@ D3D11Device::ComPtr<ID3D11DepthStencilState> D3D11Device::GetDepthState(const GP
|
||||
D3D11_COMPARISON_EQUAL, // Equal
|
||||
}};
|
||||
|
||||
static constexpr std::array<D3D11_STENCIL_OP, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping =
|
||||
{{
|
||||
D3D11_STENCIL_OP_KEEP, // Keep
|
||||
D3D11_STENCIL_OP_ZERO, // Zero
|
||||
D3D11_STENCIL_OP_REPLACE, // Replace
|
||||
D3D11_STENCIL_OP_INCR_SAT, // IncrSat
|
||||
D3D11_STENCIL_OP_DECR_SAT, // DecrSat
|
||||
D3D11_STENCIL_OP_INVERT, // Invert
|
||||
D3D11_STENCIL_OP_INCR, // Incr
|
||||
D3D11_STENCIL_OP_DECR, // Decr
|
||||
}};
|
||||
|
||||
D3D11_DEPTH_STENCIL_DESC desc = {};
|
||||
desc.DepthEnable = ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write;
|
||||
desc.DepthEnable = ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write;
|
||||
desc.DepthFunc = func_mapping[static_cast<u8>(ds.depth_test.GetValue())];
|
||||
desc.DepthWriteMask = ds.depth_write ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
|
||||
desc.StencilEnable = ds.stencil_enable;
|
||||
if (ds.stencil_enable)
|
||||
{
|
||||
desc.StencilReadMask = 0xFF;
|
||||
desc.StencilWriteMask = 0xFF;
|
||||
desc.FrontFace.StencilFailOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())];
|
||||
desc.FrontFace.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())];
|
||||
desc.FrontFace.StencilPassOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())];
|
||||
desc.FrontFace.StencilFunc = func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())];
|
||||
desc.BackFace.StencilFailOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_fail_op.GetValue())];
|
||||
desc.BackFace.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_depth_fail_op.GetValue())];
|
||||
desc.BackFace.StencilPassOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_pass_op.GetValue())];
|
||||
desc.BackFace.StencilFunc = func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())];
|
||||
}
|
||||
|
||||
HRESULT hr = m_device->CreateDepthStencilState(&desc, dds.GetAddressOf());
|
||||
if (FAILED(hr)) [[unlikely]]
|
||||
@@ -449,7 +475,7 @@ void D3D11Device::SetPipeline(GPUPipeline* pipeline)
|
||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||
{
|
||||
m_current_depth_state = ds;
|
||||
m_context->OMSetDepthStencilState(ds, 0);
|
||||
m_context->OMSetDepthStencilState(ds, m_current_stencil_ref);
|
||||
}
|
||||
|
||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||
|
||||
@@ -1509,6 +1509,16 @@ void D3D12Device::ClearDepth(GPUTexture* t, float d)
|
||||
EndRenderPass();
|
||||
}
|
||||
|
||||
void D3D12Device::ClearStencil(GPUTexture* t, u8 value)
|
||||
{
|
||||
DebugAssert(t->HasStencil());
|
||||
if (InRenderPass() && m_current_depth_target == t)
|
||||
EndRenderPass();
|
||||
|
||||
GetCommandList()->ClearDepthStencilView(static_cast<D3D12Texture*>(t)->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL,
|
||||
0.0f, value, 0, nullptr);
|
||||
}
|
||||
|
||||
void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
|
||||
{
|
||||
GPUDevice::InvalidateRenderTarget(t);
|
||||
@@ -1892,8 +1902,12 @@ void D3D12Device::BeginRenderPass()
|
||||
ds_desc_p = &ds_desc;
|
||||
ds_desc.cpuDescriptor = ds->GetWriteDescriptor();
|
||||
ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
|
||||
ds_desc.StencilBeginningAccess = {};
|
||||
ds_desc.StencilEndingAccess = {};
|
||||
ds_desc.StencilBeginningAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
|
||||
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD,
|
||||
{}};
|
||||
ds_desc.StencilEndingAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
|
||||
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD,
|
||||
{}};
|
||||
|
||||
switch (ds->GetState())
|
||||
{
|
||||
@@ -2073,6 +2087,8 @@ void D3D12Device::SetInitialPipelineState()
|
||||
m_current_blend_constant = m_current_pipeline->GetBlendConstants();
|
||||
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
|
||||
|
||||
cmdlist->OMSetStencilRef(m_current_stencil_ref);
|
||||
|
||||
SetViewport(cmdlist);
|
||||
SetScissor(cmdlist);
|
||||
}
|
||||
@@ -2101,6 +2117,15 @@ void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist)
|
||||
cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor));
|
||||
}
|
||||
|
||||
void D3D12Device::SetStencilRef(u8 value)
|
||||
{
|
||||
if (m_current_stencil_ref == value)
|
||||
return;
|
||||
|
||||
m_current_stencil_ref = value;
|
||||
GetCommandList()->OMSetStencilRef(m_current_stencil_ref);
|
||||
}
|
||||
|
||||
void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
|
||||
{
|
||||
D3D12Texture* T = static_cast<D3D12Texture*>(texture);
|
||||
|
||||
@@ -91,6 +91,7 @@ public:
|
||||
u32 src_x, u32 src_y, u32 width, u32 height) override;
|
||||
void ClearRenderTarget(GPUTexture* t, u32 c) override;
|
||||
void ClearDepth(GPUTexture* t, float d) override;
|
||||
void ClearStencil(GPUTexture* t, u8 value) override;
|
||||
void InvalidateRenderTarget(GPUTexture* t) override;
|
||||
|
||||
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
|
||||
@@ -122,6 +123,7 @@ public:
|
||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||
void SetViewport(const GSVector4i rc) override;
|
||||
void SetScissor(const GSVector4i rc) override;
|
||||
void SetStencilRef(u8 value) override;
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
@@ -357,6 +359,7 @@ private:
|
||||
u32 m_current_vertex_stride = 0;
|
||||
u32 m_current_blend_constant = 0;
|
||||
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
u8 m_current_stencil_ref = 0;
|
||||
|
||||
std::array<D3D12Texture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
|
||||
std::array<D3D12DescriptorHandle, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
|
||||
|
||||
@@ -165,7 +165,7 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
||||
D3D12_CULL_MODE_BACK, // Back
|
||||
}};
|
||||
|
||||
static constexpr std::array<D3D12_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)>
|
||||
static constexpr std::array<D3D12_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)>
|
||||
compare_mapping = {{
|
||||
D3D12_COMPARISON_FUNC_NEVER, // Never
|
||||
D3D12_COMPARISON_FUNC_ALWAYS, // Always
|
||||
@@ -176,6 +176,18 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
||||
D3D12_COMPARISON_FUNC_EQUAL, // Equal
|
||||
}};
|
||||
|
||||
static constexpr std::array<D3D12_STENCIL_OP, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping =
|
||||
{{
|
||||
D3D12_STENCIL_OP_KEEP, // Keep
|
||||
D3D12_STENCIL_OP_ZERO, // Zero
|
||||
D3D12_STENCIL_OP_REPLACE, // Replace
|
||||
D3D12_STENCIL_OP_INCR_SAT, // IncrSat
|
||||
D3D12_STENCIL_OP_DECR_SAT, // DecrSat
|
||||
D3D12_STENCIL_OP_INVERT, // Invert
|
||||
D3D12_STENCIL_OP_INCR, // Incr
|
||||
D3D12_STENCIL_OP_DECR, // Decr
|
||||
}};
|
||||
|
||||
static constexpr std::array<D3D12_BLEND, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
|
||||
D3D12_BLEND_ZERO, // Zero
|
||||
D3D12_BLEND_ONE, // One
|
||||
@@ -238,9 +250,28 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
||||
cull_mapping[static_cast<u8>(config.rasterization.cull_mode.GetValue())], false);
|
||||
if (config.samples > 1)
|
||||
gpb.SetMultisamples(config.samples);
|
||||
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write,
|
||||
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write,
|
||||
config.depth.depth_write, compare_mapping[static_cast<u8>(config.depth.depth_test.GetValue())]);
|
||||
gpb.SetNoStencilState();
|
||||
if (config.depth.stencil_enable)
|
||||
{
|
||||
const D3D12_DEPTH_STENCILOP_DESC front = {
|
||||
.StencilFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_fail_op.GetValue())],
|
||||
.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_depth_fail_op.GetValue())],
|
||||
.StencilPassOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_pass_op.GetValue())],
|
||||
.StencilFunc = compare_mapping[static_cast<u8>(config.depth.front_stencil_func.GetValue())],
|
||||
};
|
||||
const D3D12_DEPTH_STENCILOP_DESC back = {
|
||||
.StencilFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_fail_op.GetValue())],
|
||||
.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_depth_fail_op.GetValue())],
|
||||
.StencilPassOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_pass_op.GetValue())],
|
||||
.StencilFunc = compare_mapping[static_cast<u8>(config.depth.back_stencil_func.GetValue())],
|
||||
};
|
||||
gpb.SetStencilState(config.depth.stencil_enable, 0xFF, 0xFF, front, back);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpb.SetNoStencilState();
|
||||
}
|
||||
|
||||
gpb.SetBlendState(0, config.blend.enable, blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())],
|
||||
blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())],
|
||||
|
||||
@@ -129,14 +129,14 @@ GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState(
|
||||
GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState()
|
||||
{
|
||||
DepthState ret = {};
|
||||
ret.depth_test = DepthFunc::Always;
|
||||
ret.depth_test = ComparisonFunc::Always;
|
||||
return ret;
|
||||
}
|
||||
|
||||
GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState()
|
||||
{
|
||||
DepthState ret = {};
|
||||
ret.depth_test = DepthFunc::Always;
|
||||
ret.depth_test = ComparisonFunc::Always;
|
||||
ret.depth_write = true;
|
||||
return ret;
|
||||
}
|
||||
@@ -815,6 +815,16 @@ void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height)
|
||||
SetScissor(GSVector4i(x, y, x + width, y + height));
|
||||
}
|
||||
|
||||
void GPUDevice::SetStencilRef(u8 value) /*= 0*/
|
||||
{
|
||||
Panic("FIXME");
|
||||
}
|
||||
|
||||
void GPUDevice::ClearStencil(GPUTexture* t, u8 value) /*= 0*/
|
||||
{
|
||||
Panic("FIXME");
|
||||
}
|
||||
|
||||
void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height)
|
||||
{
|
||||
SetViewportAndScissor(GSVector4i(x, y, x + width, y + height));
|
||||
@@ -1222,6 +1232,12 @@ bool GPUDevice::ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u
|
||||
GPUTexture::Format format, GPUTexture::Flags flags, bool preserve /* = true */)
|
||||
{
|
||||
GPUTexture* old_tex = tex->get();
|
||||
if (old_tex && old_tex->GetWidth() == new_width && old_tex->GetHeight() == new_height && old_tex->GetType() == type &&
|
||||
old_tex->GetFormat() == format && old_tex->GetFlags() == flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1));
|
||||
std::unique_ptr<GPUTexture> new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format, flags);
|
||||
if (!new_tex) [[unlikely]]
|
||||
|
||||
@@ -289,7 +289,7 @@ public:
|
||||
MaxCount
|
||||
};
|
||||
|
||||
enum class DepthFunc : u8
|
||||
enum class ComparisonFunc : u8
|
||||
{
|
||||
Never,
|
||||
Always,
|
||||
@@ -302,6 +302,20 @@ public:
|
||||
MaxCount
|
||||
};
|
||||
|
||||
enum class StencilOp : u8
|
||||
{
|
||||
Keep,
|
||||
Zero,
|
||||
Replace,
|
||||
IncrSat,
|
||||
DecrSat,
|
||||
Invert,
|
||||
Incr,
|
||||
Decr,
|
||||
|
||||
MaxCount
|
||||
};
|
||||
|
||||
enum class BlendFunc : u8
|
||||
{
|
||||
Zero,
|
||||
@@ -353,9 +367,19 @@ public:
|
||||
|
||||
union DepthState
|
||||
{
|
||||
BitField<u8, DepthFunc, 0, 3> depth_test;
|
||||
BitField<u8, bool, 4, 1> depth_write;
|
||||
u8 key;
|
||||
BitField<u32, ComparisonFunc, 0, 3> depth_test;
|
||||
BitField<u32, bool, 4, 1> depth_write;
|
||||
|
||||
BitField<u32, bool, 5, 1> stencil_enable;
|
||||
BitField<u32, StencilOp, 6, 3> front_stencil_fail_op;
|
||||
BitField<u32, StencilOp, 9, 3> front_stencil_depth_fail_op;
|
||||
BitField<u32, StencilOp, 12, 3> front_stencil_pass_op;
|
||||
BitField<u32, ComparisonFunc, 15, 3> front_stencil_func;
|
||||
BitField<u32, StencilOp, 18, 3> back_stencil_fail_op;
|
||||
BitField<u32, StencilOp, 21, 3> back_stencil_depth_fail_op;
|
||||
BitField<u32, StencilOp, 24, 3> back_stencil_pass_op;
|
||||
BitField<u32, ComparisonFunc, 27, 3> back_stencil_func;
|
||||
u32 key;
|
||||
|
||||
// clang-format off
|
||||
ALWAYS_INLINE DepthState() = default;
|
||||
@@ -366,6 +390,13 @@ public:
|
||||
ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; }
|
||||
// clang-format on
|
||||
|
||||
ALWAYS_INLINE bool DepthMatches(const DepthState& ds) const { return ((key & 0x0Fu) == (ds.key & 0x0Fu)); }
|
||||
ALWAYS_INLINE bool StencilMatches(const DepthState& ds) const
|
||||
{
|
||||
return ((key & 0x1FFFFFF0u) == (ds.key & 0x1FFFFFF0u));
|
||||
}
|
||||
ALWAYS_INLINE bool FrontAndBackStencilAreSame() const { return ((key >> 6) & 0xFFFu) == ((key >> 18) & 0xFFFu); }
|
||||
|
||||
static DepthState GetNoTestsState();
|
||||
static DepthState GetAlwaysWriteState();
|
||||
};
|
||||
@@ -417,11 +448,11 @@ public:
|
||||
|
||||
struct GraphicsConfig
|
||||
{
|
||||
Layout layout;
|
||||
|
||||
Primitive primitive;
|
||||
InputLayout input_layout;
|
||||
|
||||
Layout layout;
|
||||
Primitive primitive;
|
||||
|
||||
RasterizationState rasterization;
|
||||
DepthState depth;
|
||||
BlendState blend;
|
||||
@@ -773,6 +804,7 @@ public:
|
||||
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0;
|
||||
virtual void ClearRenderTarget(GPUTexture* t, u32 c);
|
||||
virtual void ClearDepth(GPUTexture* t, float d);
|
||||
virtual void ClearStencil(GPUTexture* t, u8 value) = 0;
|
||||
virtual void InvalidateRenderTarget(GPUTexture* t);
|
||||
|
||||
/// Shader abstraction.
|
||||
@@ -826,6 +858,7 @@ public:
|
||||
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
|
||||
virtual void SetViewport(const GSVector4i rc) = 0;
|
||||
virtual void SetScissor(const GSVector4i rc) = 0;
|
||||
virtual void SetStencilRef(u8 value) = 0;
|
||||
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
|
||||
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags);
|
||||
void SetViewport(s32 x, s32 y, s32 width, s32 height);
|
||||
|
||||
@@ -151,6 +151,7 @@ public:
|
||||
{
|
||||
return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil);
|
||||
}
|
||||
ALWAYS_INLINE bool HasStencil() const { return IsDepthStencilFormat(m_format); }
|
||||
|
||||
ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; }
|
||||
ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; }
|
||||
|
||||
@@ -192,6 +192,33 @@ void OpenGLDevice::ClearDepth(GPUTexture* t, float d)
|
||||
CommitDSClearInFB(static_cast<OpenGLTexture*>(t));
|
||||
}
|
||||
|
||||
void OpenGLDevice::ClearStencil(GPUTexture* t, u8 value)
|
||||
{
|
||||
OpenGLTexture* T = static_cast<OpenGLTexture*>(t);
|
||||
DebugAssert(T->HasStencil());
|
||||
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
const GLint ivalue = value;
|
||||
|
||||
if (m_current_depth_target == T)
|
||||
{
|
||||
glClearBufferiv(GL_STENCIL, 0, &ivalue);
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, T->GetGLTarget(), T->GetGLId(), 0);
|
||||
|
||||
glClearBufferiv(GL_STENCIL, 0, &ivalue);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
|
||||
}
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
}
|
||||
|
||||
void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
|
||||
{
|
||||
GPUDevice::InvalidateRenderTarget(t);
|
||||
@@ -708,7 +735,8 @@ GLuint OpenGLDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUT
|
||||
if (ds)
|
||||
{
|
||||
OpenGLTexture* const DS = static_cast<OpenGLTexture*>(ds);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, DS->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
|
||||
DS->GetGLTarget(), DS->GetGLId(), 0);
|
||||
}
|
||||
|
||||
glDrawBuffers(num_rts, s_draw_buffers.data());
|
||||
@@ -746,6 +774,7 @@ bool OpenGLDevice::CreateBuffers()
|
||||
GL_OBJECT_NAME(m_uniform_buffer, "Device Uniform Buffer");
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
|
||||
m_uniform_buffer_alignment = std::max<GLuint>(m_uniform_buffer_alignment, 16);
|
||||
|
||||
if (!m_disable_pbo)
|
||||
{
|
||||
@@ -1270,6 +1299,16 @@ void OpenGLDevice::SetScissor(const GSVector4i rc)
|
||||
UpdateScissor();
|
||||
}
|
||||
|
||||
void OpenGLDevice::SetStencilRef(u8 value)
|
||||
{
|
||||
if (m_last_stencil_ref == value)
|
||||
return;
|
||||
|
||||
m_last_stencil_ref = value;
|
||||
if (m_last_depth_state.stencil_enable)
|
||||
UpdateStencilFunc();
|
||||
}
|
||||
|
||||
void OpenGLDevice::UpdateViewport()
|
||||
{
|
||||
glViewport(m_last_viewport.left, m_last_viewport.top, m_last_viewport.width(), m_last_viewport.height());
|
||||
|
||||
@@ -74,6 +74,7 @@ public:
|
||||
u32 src_x, u32 src_y, u32 width, u32 height) override;
|
||||
void ClearRenderTarget(GPUTexture* t, u32 c) override;
|
||||
void ClearDepth(GPUTexture* t, float d) override;
|
||||
void ClearStencil(GPUTexture* t, u8 value) override;
|
||||
void InvalidateRenderTarget(GPUTexture* t) override;
|
||||
|
||||
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
|
||||
@@ -105,6 +106,7 @@ public:
|
||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||
void SetViewport(const GSVector4i rc) override;
|
||||
void SetScissor(const GSVector4i rc) override;
|
||||
void SetStencilRef(u8 value) override;
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
@@ -173,6 +175,7 @@ private:
|
||||
|
||||
void UpdateViewport();
|
||||
void UpdateScissor();
|
||||
void UpdateStencilFunc();
|
||||
|
||||
void CreateTimestampQueries();
|
||||
void DestroyTimestampQueries();
|
||||
@@ -206,8 +209,9 @@ private:
|
||||
// VAO cache - fixed max as key
|
||||
OpenGLPipeline::VertexArrayCache::const_iterator m_last_vao = m_vao_cache.cend();
|
||||
GPUPipeline::BlendState m_last_blend_state = {};
|
||||
GPUPipeline::RasterizationState m_last_rasterization_state = {};
|
||||
GPUPipeline::DepthState m_last_depth_state = {};
|
||||
GPUPipeline::RasterizationState m_last_rasterization_state = {};
|
||||
u8 m_last_stencil_ref = 0;
|
||||
GLuint m_uniform_buffer_alignment = 1;
|
||||
GLuint m_last_program = 0;
|
||||
u32 m_last_texture_unit = 0;
|
||||
|
||||
@@ -686,9 +686,8 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyRasterizationState(GPUPipeline::Ra
|
||||
m_last_rasterization_state = rs;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds)
|
||||
{
|
||||
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping = {{
|
||||
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)> s_comparison_func_mapping =
|
||||
{{
|
||||
GL_NEVER, // Never
|
||||
GL_ALWAYS, // Always
|
||||
GL_LESS, // Less
|
||||
@@ -698,18 +697,82 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState
|
||||
GL_EQUAL, // Equal
|
||||
}};
|
||||
|
||||
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds)
|
||||
{
|
||||
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{
|
||||
GL_KEEP, // Keep
|
||||
GL_ZERO, // Zero
|
||||
GL_REPLACE, // Replace
|
||||
GL_INCR, // IncrSat
|
||||
GL_DECR, // DecrSat
|
||||
GL_INVERT, // Invert
|
||||
GL_INCR_WRAP, // Incr
|
||||
GL_DECR_WRAP, // Decr
|
||||
}};
|
||||
|
||||
if (m_last_depth_state == ds)
|
||||
return;
|
||||
|
||||
(ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) :
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDepthFunc(func_mapping[static_cast<u8>(ds.depth_test.GetValue())]);
|
||||
if (m_last_depth_state.depth_write != ds.depth_write)
|
||||
glDepthMask(ds.depth_write);
|
||||
if (!m_last_depth_state.DepthMatches(ds))
|
||||
{
|
||||
(ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) :
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDepthFunc(s_comparison_func_mapping[static_cast<u8>(ds.depth_test.GetValue())]);
|
||||
if (m_last_depth_state.depth_write != ds.depth_write)
|
||||
glDepthMask(ds.depth_write);
|
||||
}
|
||||
|
||||
if (!m_last_depth_state.StencilMatches(ds))
|
||||
{
|
||||
if (m_last_depth_state.stencil_enable != ds.stencil_enable)
|
||||
ds.stencil_enable ? glEnable(GL_STENCIL_TEST) : glDisable(GL_STENCIL_TEST);
|
||||
|
||||
if (ds.FrontAndBackStencilAreSame())
|
||||
{
|
||||
glStencilFuncSeparate(GL_FRONT_AND_BACK,
|
||||
s_comparison_func_mapping[static_cast<u8>(ds.front_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
glStencilOpSeparate(GL_FRONT_AND_BACK, stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())]);
|
||||
}
|
||||
else
|
||||
{
|
||||
glStencilFuncSeparate(GL_FRONT, s_comparison_func_mapping[static_cast<u8>(ds.front_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
glStencilFuncSeparate(GL_BACK, s_comparison_func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
glStencilOpSeparate(GL_FRONT, stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())]);
|
||||
glStencilOpSeparate(GL_BACK, stencil_op_mapping[static_cast<u8>(ds.back_stencil_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.back_stencil_depth_fail_op.GetValue())],
|
||||
stencil_op_mapping[static_cast<u8>(ds.back_stencil_pass_op.GetValue())]);
|
||||
}
|
||||
}
|
||||
|
||||
m_last_depth_state = ds;
|
||||
}
|
||||
|
||||
void OpenGLDevice::UpdateStencilFunc()
|
||||
{
|
||||
if (m_last_depth_state.FrontAndBackStencilAreSame())
|
||||
{
|
||||
glStencilFuncSeparate(GL_FRONT_AND_BACK,
|
||||
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.front_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
}
|
||||
else
|
||||
{
|
||||
glStencilFuncSeparate(GL_FRONT,
|
||||
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.front_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
glStencilFuncSeparate(GL_BACK,
|
||||
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.back_stencil_func.GetValue())],
|
||||
m_last_stencil_ref, 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyBlendState(GPUPipeline::BlendState bs)
|
||||
{
|
||||
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
|
||||
|
||||
@@ -587,7 +587,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
|
||||
{
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
|
||||
|
||||
const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
|
||||
const GLenum attachment = tex->IsDepthStencil() ?
|
||||
(tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) :
|
||||
GL_COLOR_ATTACHMENT0;
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0);
|
||||
|
||||
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
|
||||
@@ -612,7 +614,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
|
||||
{
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
|
||||
|
||||
const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
|
||||
const GLenum attachment = tex->IsDepthStencil() ?
|
||||
(tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) :
|
||||
GL_COLOR_ATTACHMENT0;
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0);
|
||||
|
||||
if (tex->IsDepthStencil())
|
||||
@@ -701,7 +705,7 @@ void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex)
|
||||
{
|
||||
case GPUTexture::State::Invalidated:
|
||||
{
|
||||
const GLenum attachment = GL_DEPTH_ATTACHMENT;
|
||||
const GLenum attachment = tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
if (glInvalidateFramebuffer)
|
||||
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
|
||||
tex->SetState(GPUTexture::State::Dirty);
|
||||
|
||||
@@ -582,9 +582,9 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t
|
||||
GL_OBJECT_NAME(vso, "Post-processing rotate blit VS");
|
||||
GL_OBJECT_NAME(vso, "Post-processing rotate blit FS");
|
||||
|
||||
const GPUPipeline::GraphicsConfig config = {.layout = GPUPipeline::Layout::SingleTextureAndPushConstants,
|
||||
const GPUPipeline::GraphicsConfig config = {.input_layout = {},
|
||||
.layout = GPUPipeline::Layout::SingleTextureAndPushConstants,
|
||||
.primitive = GPUPipeline::Primitive::Triangles,
|
||||
.input_layout = {},
|
||||
.rasterization = GPUPipeline::RasterizationState::GetNoCullState(),
|
||||
.depth = GPUPipeline::DepthState::GetNoTestsState(),
|
||||
.blend = GPUPipeline::BlendState::GetNoBlendingState(),
|
||||
|
||||
@@ -1976,7 +1976,7 @@ bool VulkanDevice::CreateDeviceAndMainSwapChain(std::string_view adapter, Featur
|
||||
// Read device physical memory properties, we need it for allocating buffers
|
||||
vkGetPhysicalDeviceProperties(m_physical_device, &m_device_properties);
|
||||
m_device_properties.limits.minUniformBufferOffsetAlignment =
|
||||
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
|
||||
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(16));
|
||||
m_device_properties.limits.minTexelBufferOffsetAlignment =
|
||||
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
|
||||
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
|
||||
@@ -2317,8 +2317,8 @@ void VulkanDevice::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u
|
||||
m_current_swap_chain = nullptr;
|
||||
|
||||
VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget, 0, 1, 0,
|
||||
1, VulkanTexture::Layout::ColorAttachment,
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget,
|
||||
SC->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment,
|
||||
VulkanTexture::Layout::PresentSrc);
|
||||
EndAndSubmitCommandBuffer(SC, explicit_present);
|
||||
InvalidateCachedState();
|
||||
@@ -2647,6 +2647,25 @@ void VulkanDevice::ClearDepth(GPUTexture* t, float d)
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::ClearStencil(GPUTexture* t, u8 value)
|
||||
{
|
||||
VulkanTexture* T = static_cast<VulkanTexture*>(t);
|
||||
const VkClearDepthStencilValue clear_value = {0.0f, static_cast<u32>(value)};
|
||||
if (InRenderPass() && m_current_depth_target == T)
|
||||
{
|
||||
// Use an attachment clear so the render pass isn't restarted.
|
||||
const VkClearAttachment ca = {VK_IMAGE_ASPECT_STENCIL_BIT, 0, {.depthStencil = clear_value}};
|
||||
const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u};
|
||||
vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc);
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u};
|
||||
T->TransitionToLayout(VulkanTexture::Layout::ClearDst);
|
||||
vkCmdClearDepthStencilImage(m_current_command_buffer, T->GetImage(), T->GetVkLayout(), &clear_value, 1, &srr);
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::InvalidateRenderTarget(GPUTexture* t)
|
||||
{
|
||||
GPUDevice::InvalidateRenderTarget(t);
|
||||
@@ -3012,11 +3031,13 @@ void VulkanDevice::RenderBlankFrame(VulkanSwapChain* swap_chain)
|
||||
const VkImage image = swap_chain->GetCurrentImage();
|
||||
static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
|
||||
static constexpr VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}};
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
|
||||
VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst);
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(),
|
||||
0, 1, 0, 1, VulkanTexture::Layout::Undefined,
|
||||
VulkanTexture::Layout::TransferDst);
|
||||
vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &srr);
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
|
||||
VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc);
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(),
|
||||
0, 1, 0, 1, VulkanTexture::Layout::TransferDst,
|
||||
VulkanTexture::Layout::PresentSrc);
|
||||
|
||||
EndAndSubmitCommandBuffer(swap_chain, false);
|
||||
|
||||
@@ -3205,7 +3226,7 @@ void VulkanDevice::BeginRenderPass()
|
||||
VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
|
||||
|
||||
std::array<VkRenderingAttachmentInfoKHR, MAX_RENDER_TARGETS> attachments;
|
||||
VkRenderingAttachmentInfoKHR depth_attachment;
|
||||
VkRenderingAttachmentInfoKHR depth_attachment, stencil_attachment;
|
||||
|
||||
if (m_num_current_render_targets > 0 || m_current_depth_target)
|
||||
{
|
||||
@@ -3276,6 +3297,20 @@ void VulkanDevice::BeginRenderPass()
|
||||
depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u};
|
||||
|
||||
ds->SetState(GPUTexture::State::Dirty);
|
||||
|
||||
if (ds->HasStencil())
|
||||
{
|
||||
stencil_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
|
||||
stencil_attachment.pNext = nullptr;
|
||||
stencil_attachment.imageView = ds->GetView();
|
||||
stencil_attachment.imageLayout = ds->GetVkLayout();
|
||||
stencil_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
|
||||
stencil_attachment.resolveImageView = VK_NULL_HANDLE;
|
||||
stencil_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
ri.pStencilAttachment = &stencil_attachment;
|
||||
}
|
||||
}
|
||||
|
||||
const VulkanTexture* const rt_or_ds =
|
||||
@@ -3372,7 +3407,15 @@ void VulkanDevice::BeginRenderPass()
|
||||
|
||||
// If this is a new command buffer, bind the pipeline and such.
|
||||
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||
{
|
||||
SetInitialPipelineState();
|
||||
}
|
||||
else if (m_current_depth_target && m_current_depth_target->IsDepthStencil())
|
||||
{
|
||||
// Stencil reference still needs to be set.
|
||||
vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
ZeroExtend32(m_current_stencil_ref));
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 clear_color)
|
||||
@@ -3383,8 +3426,8 @@ void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 cle
|
||||
const VkImage swap_chain_image = swap_chain->GetCurrentImage();
|
||||
|
||||
// Swap chain images start in undefined
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
|
||||
VulkanTexture::Layout::Undefined,
|
||||
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget,
|
||||
swap_chain->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::Undefined,
|
||||
VulkanTexture::Layout::ColorAttachment);
|
||||
|
||||
// All textures should be in shader read only optimal already, but just in case..
|
||||
@@ -3563,6 +3606,12 @@ void VulkanDevice::SetInitialPipelineState()
|
||||
const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top},
|
||||
{static_cast<u32>(m_current_scissor.width()), static_cast<u32>(m_current_scissor.height())}};
|
||||
vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc);
|
||||
|
||||
if (m_current_depth_target && m_current_depth_target->IsDepthStencil())
|
||||
{
|
||||
vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||
ZeroExtend32(m_current_stencil_ref));
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
|
||||
@@ -3667,6 +3716,20 @@ void VulkanDevice::SetViewport(const GSVector4i rc)
|
||||
vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp);
|
||||
}
|
||||
|
||||
void VulkanDevice::SetStencilRef(u8 value)
|
||||
{
|
||||
if (m_current_stencil_ref == value)
|
||||
return;
|
||||
|
||||
m_current_stencil_ref = value;
|
||||
|
||||
// if current DS does not have a stencil component, then dynamic stencil state will not be enabled
|
||||
if (!InRenderPass() || !m_current_depth_target || !m_current_depth_target->IsDepthStencil())
|
||||
return;
|
||||
|
||||
vkCmdSetStencilReference(m_current_command_buffer, VK_STENCIL_FACE_FRONT_AND_BACK, ZeroExtend32(value));
|
||||
}
|
||||
|
||||
void VulkanDevice::SetScissor(const GSVector4i rc)
|
||||
{
|
||||
if (m_current_scissor.eq(rc))
|
||||
|
||||
@@ -108,6 +108,7 @@ public:
|
||||
u32 src_x, u32 src_y, u32 width, u32 height) override;
|
||||
void ClearRenderTarget(GPUTexture* t, u32 c) override;
|
||||
void ClearDepth(GPUTexture* t, float d) override;
|
||||
void ClearStencil(GPUTexture* t, u8 value) override;
|
||||
void InvalidateRenderTarget(GPUTexture* t) override;
|
||||
|
||||
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
|
||||
@@ -139,6 +140,7 @@ public:
|
||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||
void SetViewport(const GSVector4i rc) override;
|
||||
void SetScissor(const GSVector4i rc) override;
|
||||
void SetStencilRef(u8 value) override;
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
@@ -471,6 +473,7 @@ private:
|
||||
|
||||
VulkanPipeline* m_current_pipeline = nullptr;
|
||||
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
u8 m_current_stencil_ref = 0;
|
||||
|
||||
std::array<VulkanTexture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
|
||||
std::array<VkSampler, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
|
||||
|
||||
@@ -150,7 +150,7 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
VK_CULL_MODE_BACK_BIT, // Back
|
||||
}};
|
||||
|
||||
static constexpr std::array<VkCompareOp, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> compare_mapping = {{
|
||||
static constexpr std::array<VkCompareOp, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)> compare_mapping = {{
|
||||
VK_COMPARE_OP_NEVER, // Never
|
||||
VK_COMPARE_OP_ALWAYS, // Always
|
||||
VK_COMPARE_OP_LESS, // Less
|
||||
@@ -160,6 +160,17 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
VK_COMPARE_OP_EQUAL, // Equal
|
||||
}};
|
||||
|
||||
static constexpr std::array<VkStencilOp, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{
|
||||
VK_STENCIL_OP_KEEP, // Keep
|
||||
VK_STENCIL_OP_ZERO, // Zero
|
||||
VK_STENCIL_OP_REPLACE, // Replace
|
||||
VK_STENCIL_OP_INCREMENT_AND_CLAMP, // IncrSat
|
||||
VK_STENCIL_OP_DECREMENT_AND_CLAMP, // DecrSat
|
||||
VK_STENCIL_OP_INVERT, // Invert
|
||||
VK_STENCIL_OP_INCREMENT_AND_WRAP, // Incr
|
||||
VK_STENCIL_OP_DECREMENT_AND_WRAP, // Decr
|
||||
}};
|
||||
|
||||
static constexpr std::array<VkBlendFactor, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
|
||||
VK_BLEND_FACTOR_ZERO, // Zero
|
||||
VK_BLEND_FACTOR_ONE, // One
|
||||
@@ -215,9 +226,35 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
VK_FRONT_FACE_CLOCKWISE);
|
||||
if (config.samples > 1)
|
||||
gpb.SetMultisamples(config.samples, config.per_sample_shading);
|
||||
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write,
|
||||
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write,
|
||||
config.depth.depth_write, compare_mapping[static_cast<u8>(config.depth.depth_test.GetValue())]);
|
||||
gpb.SetNoStencilState();
|
||||
|
||||
if (config.depth.stencil_enable)
|
||||
{
|
||||
const VkStencilOpState front = {
|
||||
.failOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_fail_op.GetValue())],
|
||||
.passOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_pass_op.GetValue())],
|
||||
.depthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_depth_fail_op.GetValue())],
|
||||
.compareOp = compare_mapping[static_cast<u8>(config.depth.front_stencil_func.GetValue())],
|
||||
.compareMask = 0xFFu,
|
||||
.writeMask = 0xFFu,
|
||||
.reference = 0x00u,
|
||||
};
|
||||
const VkStencilOpState back = {
|
||||
.failOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_fail_op.GetValue())],
|
||||
.passOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_pass_op.GetValue())],
|
||||
.depthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_depth_fail_op.GetValue())],
|
||||
.compareOp = compare_mapping[static_cast<u8>(config.depth.back_stencil_func.GetValue())],
|
||||
.compareMask = 0xFFu,
|
||||
.writeMask = 0xFFu,
|
||||
.reference = 0x00u,
|
||||
};
|
||||
gpb.SetStencilState(true, front, back);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpb.SetNoStencilState();
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
|
||||
{
|
||||
@@ -239,6 +276,9 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT);
|
||||
gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR);
|
||||
|
||||
if (GPUTexture::IsDepthStencilFormat(config.depth_format))
|
||||
gpb.AddDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
|
||||
|
||||
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(config.render_pass_flags))]
|
||||
[static_cast<size_t>(config.layout)]);
|
||||
|
||||
@@ -258,8 +298,9 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
|
||||
if (config.depth_format != GPUTexture::Format::Unknown)
|
||||
{
|
||||
gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)],
|
||||
VK_FORMAT_UNDEFINED);
|
||||
const VkFormat vk_format = VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)];
|
||||
gpb.SetDynamicRenderingDepthAttachment(
|
||||
vk_format, GPUTexture::IsDepthStencilFormat(config.depth_format) ? vk_format : VK_FORMAT_UNDEFINED);
|
||||
}
|
||||
|
||||
if (config.render_pass_flags & GPUPipeline::ColorFeedbackLoop)
|
||||
|
||||
@@ -116,7 +116,9 @@ std::unique_ptr<VulkanTexture> VulkanTexture::Create(u32 width, u32 height, u32
|
||||
DebugAssert(levels == 1);
|
||||
ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
vci.subresourceRange.aspectMask = IsDepthStencilFormat(format) ?
|
||||
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) :
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -490,19 +492,19 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe
|
||||
u32 start_level, u32 num_levels, Layout old_layout,
|
||||
Layout new_layout)
|
||||
{
|
||||
TransitionSubresourcesToLayout(command_buffer, m_image, m_type, start_layer, num_layers, start_level, num_levels,
|
||||
old_layout, new_layout);
|
||||
TransitionSubresourcesToLayout(command_buffer, m_image, m_type, m_format, start_layer, num_layers, start_level,
|
||||
num_levels, old_layout, new_layout);
|
||||
}
|
||||
|
||||
void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type,
|
||||
u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels,
|
||||
Layout old_layout, Layout new_layout)
|
||||
Format format, u32 start_layer, u32 num_layers, u32 start_level,
|
||||
u32 num_levels, Layout old_layout, Layout new_layout)
|
||||
{
|
||||
VkImageAspectFlags aspect;
|
||||
if (type == Type::DepthStencil)
|
||||
{
|
||||
// TODO: detect stencil
|
||||
aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
aspect = IsDepthStencilFormat(format) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) :
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -73,9 +73,9 @@ public:
|
||||
void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_layer, u32 num_layers, u32 start_level,
|
||||
u32 num_levels, Layout old_layout, Layout new_layout);
|
||||
|
||||
static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, u32 start_layer,
|
||||
u32 num_layers, u32 start_level, u32 num_levels, Layout old_layout,
|
||||
Layout new_layout);
|
||||
static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, Format format,
|
||||
u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels,
|
||||
Layout old_layout, Layout new_layout);
|
||||
|
||||
// Call when the texture is bound to the pipeline, or read from in a copy.
|
||||
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
|
||||
|
||||
Reference in New Issue
Block a user