Compare commits

...

7 Commits

Author SHA1 Message Date
Stenzek
113614c2f5 WIP 2025-01-07 21:54:15 +10:00
Stenzek
ac41ace972 GPUDevice: Add stencil testing support 2025-01-07 19:39:40 +10:00
Stenzek
ebe782e4f4 Common: Add Vector4i xyxy(Vector2i) 2025-01-07 19:38:58 +10:00
Stenzek
e7439c1503 GPUDevice: Move size-matches check into ResizeTexture() 2025-01-07 19:25:11 +10:00
Stenzek
fda87de7e7 GPU/HW: Slight re-shuffling of field offsets
Free up some bits in the middle.
2025-01-07 19:25:11 +10:00
Stenzek
116bc83d09 GPUDevice: Ensure 16 byte minimum UBO alignment 2025-01-07 19:25:11 +10:00
Stenzek
ddffc055b9 GPU/HW: Use sized tristrips instead of fullscreen quads 2025-01-07 19:25:11 +10:00
30 changed files with 927 additions and 191 deletions

View File

@@ -2346,6 +2346,8 @@ public:
return GSVector4i(vcombine_s32(xy.v2s, zw.v2s));
}
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw) { return GSVector4i(vcombine_s32(xyzw.v2s, xyzw.v2s)); }
ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(vget_low_s32(v4s)); }
ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(vget_high_s32(v4s)); }

View File

@@ -1665,6 +1665,8 @@ public:
return GSVector4i(xy.x, xy.y, zw.x, zw.y);
}
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw) { return GSVector4i(xyxy.x, xyxy.y, xyzw.x, xyzw.y); }
ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(x, y); }
ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(z, w); }

View File

@@ -1831,6 +1831,11 @@ public:
ALWAYS_INLINE GSVector4i xyxy(const GSVector4i& v) const { return upl64(v); }
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xyzw)
{
return GSVector4i(_mm_unpacklo_epi64(xyzw.m, xyzw.m));
}
ALWAYS_INLINE static GSVector4i xyxy(const GSVector2i& xy, const GSVector2i& zw)
{
return GSVector4i(_mm_unpacklo_epi64(xy.m, zw.m));

View File

@@ -1031,19 +1031,14 @@ bool GPUBackend::Deinterlace(u32 field)
const u32 height = m_display_texture_view_height;
const auto copy_to_field_buffer = [&](u32 buffer) {
if (!m_deinterlace_buffers[buffer] || m_deinterlace_buffers[buffer]->GetWidth() != width ||
m_deinterlace_buffers[buffer]->GetHeight() != height ||
m_deinterlace_buffers[buffer]->GetFormat() != src->GetFormat())
if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture,
src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]]
{
if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[buffer], width, height, GPUTexture::Type::Texture,
src->GetFormat(), GPUTexture::Flags::None, false)) [[unlikely]]
{
return false;
}
GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer);
return false;
}
GL_OBJECT_NAME_FMT(m_deinterlace_buffers[buffer], "Blend Deinterlace Buffer {}", buffer);
GL_INS_FMT("Copy {}x{} from {},{} to field buffer {}", width, height, x, y, buffer);
g_gpu_device->CopyTextureRegion(m_deinterlace_buffers[buffer].get(), 0, 0, 0, 0, m_display_texture, x, y, 0, 0,
width, height);
@@ -1158,18 +1153,13 @@ bool GPUBackend::Deinterlace(u32 field)
bool GPUBackend::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve)
{
if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width ||
m_deinterlace_texture->GetHeight() != height)
if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]]
{
if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, preserve)) [[unlikely]]
{
return false;
}
GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture");
return false;
}
GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture");
return true;
}
@@ -1179,19 +1169,15 @@ bool GPUBackend::ApplyChromaSmoothing()
const u32 y = m_display_texture_view_y;
const u32 width = m_display_texture_view_width;
const u32 height = m_display_texture_view_height;
if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width ||
m_chroma_smoothing_texture->GetHeight() != height)
if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false))
{
if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget,
GPUTexture::Format::RGBA8, GPUTexture::Flags::None, false))
{
ClearDisplayTexture();
return false;
}
GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture");
ClearDisplayTexture();
return false;
}
GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture");
GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height);
m_display_texture->MakeReadyForSampling();

View File

@@ -172,10 +172,11 @@ protected:
void DestroyDeinterlaceTextures();
bool ApplyChromaSmoothing();
GSVector4i m_clamped_drawing_area = {};
s32 m_display_width = 0;
s32 m_display_height = 0;
GSVector4i m_clamped_drawing_area = {};
s32 m_display_origin_left = 0;
s32 m_display_origin_top = 0;
s32 m_display_vram_width = 0;

View File

@@ -41,8 +41,8 @@ LOG_CHANNEL(GPU_HW);
// TODO: instead of full state restore, only restore what changed
static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8;
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16;
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F;
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D32FS8;
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32FS8;
static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F;
#if defined(_DEBUG) || defined(_DEVEL)
@@ -427,11 +427,13 @@ void GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss)
void GPU_HW::RestoreDeviceContext()
{
m_batch_ubo_dirty = true;
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
SetVRAMRenderTarget();
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
g_gpu_device->SetViewport(m_vram_texture->GetRect());
SetScissor();
m_batch_ubo_dirty = true;
}
void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
@@ -447,7 +449,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() ||
m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() ||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
const bool shaders_changed =
((m_resolution_scale > 1) != (resolution_scale > 1) || m_multisamples != multisamples ||
m_true_color != g_gpu_settings.gpu_true_color ||
@@ -466,7 +470,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
g_gpu_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) ||
m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter,
g_gpu_settings.gpu_sprite_texture_filter) ||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
const bool resolution_dependent_shaders_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples);
const bool downsampling_shaders_changed =
@@ -889,6 +895,7 @@ void GPU_HW::PrintSettingsToLog()
GPUTexture::Format GPU_HW::GetDepthBufferFormat() const
{
// Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit.
// TODO: AMD doesn't support D24S8
return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) :
VRAM_DS_FORMAT;
}
@@ -978,6 +985,10 @@ bool GPU_HW::CreateBuffers(Error* error)
SetVRAMRenderTarget();
SetFullVRAMDirtyRectangle();
if (UseStencilBuffer())
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
return true;
}
@@ -990,6 +1001,9 @@ void GPU_HW::ClearFramebuffer()
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
else
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
if (UseStencilBuffer())
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
}
ClearVRAMDirtyRectangle();
if (m_use_texture_cache)
@@ -1057,6 +1071,15 @@ bool GPU_HW::CompileCommonShaders(Error* error)
if (!m_fullscreen_quad_vertex_shader)
return false;
GL_OBJECT_NAME(m_fullscreen_quad_vertex_shader, "Fullscreen Quad Vertex Shader");
m_screen_quad_vertex_shader = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
shadergen.GenerateScreenVertexShader(), error);
if (!m_screen_quad_vertex_shader)
return false;
GL_OBJECT_NAME(m_screen_quad_vertex_shader, "Screen Quad Vertex Shader");
return true;
}
@@ -1410,13 +1433,27 @@ bool GPU_HW::CompilePipelines(Error* error)
{
plconfig.depth.depth_test =
m_pgxp_depth_buffer ?
(depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) :
(check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always);
(depth_test ? GPUPipeline::ComparisonFunc::LessEqual : GPUPipeline::ComparisonFunc::Always) :
(check_mask ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always);
// Don't write for transparent, but still test.
plconfig.depth.depth_write =
!m_pgxp_depth_buffer ||
(depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled));
if (UseStencilBuffer())
{
const bool replace = (transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled) ||
render_mode == static_cast<u8>(BatchRenderMode::TransparencyDisabled) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque));
plconfig.depth.stencil_enable = true;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.back_stencil_pass_op =
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.front_stencil_pass_op =
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
}
}
plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT,
@@ -1537,15 +1574,32 @@ bool GPU_HW::CompilePipelines(Error* error)
batch_shader_guard.Run();
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
};
// common state
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
plconfig.input_layout.vertex_attributes = screen_vertex_attributes;
plconfig.input_layout.vertex_stride = sizeof(ScreenVertex);
plconfig.primitive = GPUPipeline::Primitive::TriangleStrips;
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
if (UseStencilBuffer())
{
plconfig.depth.stencil_enable = true;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Replace;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Replace;
}
// VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
{
@@ -1560,8 +1614,9 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth.depth_test =
needs_real_depth_buffer ? GPUPipeline::ComparisonFunc::Always : GPUPipeline::ComparisonFunc::Never;
plconfig.depth.depth_write = needs_real_depth_buffer;
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@@ -1587,7 +1642,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_copy_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig), error))
return false;
@@ -1619,7 +1674,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_write_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@@ -1631,8 +1686,6 @@ bool GPU_HW::CompilePipelines(Error* error)
}
}
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
// VRAM write replacement
{
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
@@ -1641,7 +1694,10 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test = GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@@ -1649,6 +1705,12 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
}
plconfig.vertex_shader = m_fullscreen_quad_vertex_shader.get();
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
// VRAM update depth
if (m_write_mask_as_depth)
{
@@ -1861,6 +1923,61 @@ bool GPU_HW::CompileDownsamplePipelines(Error* error)
}
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveStencilDownsampleBlurFragmentShader(m_resolution_scale, m_multisamples), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Downsample Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_blur_pipeline, "Adaptive Stencil Downsample Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveStencilDownsampleCompositeFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Composite Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_composite_pipeline, "Adaptive Stencil Composite Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateFillFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Mark Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.SetTargetFormats(GPUTexture::Format::R8, GetDepthBufferFormat());
plconfig.samples = m_multisamples;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth.stencil_enable = true;
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_fail_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Equal;
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.back_stencil_fail_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Equal;
if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_pass_pipeline, "Adaptive Stencil Downsample Pipeline");
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
std::unique_ptr<GPUShader> fs =
@@ -1954,6 +2071,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written)
void GPU_HW::UpdateDepthBufferFromMaskBit()
{
GL_SCOPE_FMT("UpdateDepthBufferFromMaskBit()");
DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth);
// Viewport should already be set full, only need to fudge the scissor.
@@ -2449,10 +2567,28 @@ void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled)
m_batch.sprite_mode = enabled;
}
void GPU_HW::SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value)
{
if (!UseStencilBuffer() || m_batch.stencil_reference == value)
return;
if (m_batch_index_count > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCommand(cmd);
}
GL_INS_FMT("Stencil reference is now {}", value);
m_batch.stencil_reference = value;
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
}
void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
SetBatchStencilReference(cmd, 0);
const u32 num_vertices = cmd->num_vertices;
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
@@ -2497,6 +2633,7 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
SetBatchDepthBuffer(cmd, use_depth);
SetBatchStencilReference(cmd, BoolToUInt8(use_depth));
const u32 num_vertices = cmd->num_vertices;
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
@@ -2656,6 +2793,7 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
SetBatchSpriteMode(cmd, m_allow_sprite_mode);
SetBatchStencilReference(cmd, 0);
DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
const s32 pos_x = cmd->x;
@@ -2933,10 +3071,12 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand
const GSVector4i clamped_draw_rect_123)
{
// Use PGXP to exclude primitives that are definitely 3D.
const bool really_3d = is_precise ? is_3d : IsPossibleSpritePolygon(vertices.data());
if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon)
HandleFlippedQuadTextureCoordinates(cmd, vertices.data());
else if (m_allow_sprite_mode)
SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data()));
SetBatchSpriteMode(cmd, !really_3d);
SetBatchStencilReference(cmd, BoolToUInt8(really_3d));
if (cmd->texture_enable && m_compute_uv_range)
ComputePolygonUVLimits(cmd, vertices.data(), num_vertices);
@@ -2997,9 +3137,13 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height);
g_gpu_device->Draw(3, 0);
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
g_gpu_device->SetScissor(rect);
DrawScreenQuad(rect);
RestoreDeviceContext();
return true;
}
@@ -3036,10 +3180,9 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCo
if (m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT)
{
DebugAssert(!(m_texpage_dirty & (TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT)));
DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture &&
m_batch.texture_cache_key.page < NUM_VRAM_PAGES);
DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture && m_texture_cache_key.page < NUM_VRAM_PAGES);
if (GPUTextureCache::AreSourcePagesDrawn(m_batch.texture_cache_key, m_current_uv_rect))
if (GPUTextureCache::AreSourcePagesDrawn(m_texture_cache_key, m_current_uv_rect))
{
// UVs intersect with drawn area, can't use TC
if (m_batch_index_count > 0)
@@ -3049,7 +3192,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(const GPUBackendDrawCo
}
// We need to swap the dirty tracking over to drawn/written.
const GSVector4i page_rect = GetTextureRect(m_batch.texture_cache_key.page, m_batch.texture_cache_key.mode);
const GSVector4i page_rect = GetTextureRect(m_texture_cache_key.page, m_texture_cache_key.mode);
m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) |
(m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0);
m_compute_uv_range = (ShouldCheckForTexPageOverlap() || m_clamp_uvs);
@@ -3191,11 +3334,40 @@ void GPU_HW::ResetBatchVertexDepth()
m_current_depth = 1;
}
ALWAYS_INLINE bool GPU_HW::UseStencilBuffer() const
{
return (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil);
}
ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const
{
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
}
ALWAYS_INLINE_RELEASE void GPU_HW::DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds)
{
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
const GSVector2 fsize = GSVector2(m_vram_texture->GetSizeVec());
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
const GSVector4 xy = GSVector4::xyxy(x, y).xzyw();
DebugAssert(IsFlushed());
ScreenVertex* vertices;
u32 space;
u32 base_vertex;
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
GSVector4::store<false>(&vertices[0], GSVector4::xyxy(xy.xy(), uv_bounds.xy()));
GSVector4::store<false>(&vertices[1], GSVector4::xyxy(xy.zyzw().xy(), uv_bounds.zyzw().xy()));
GSVector4::store<false>(&vertices[2], GSVector4::xyxy(xy.xwzw().xy(), uv_bounds.xwzw().xy()));
GSVector4::store<false>(&vertices[3], GSVector4::xyxy(xy.zw(), uv_bounds.zw()));
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
g_gpu_device->Draw(4, base_vertex);
}
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 active_line_lsb)
{
FlushRender();
@@ -3225,9 +3397,6 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT));
g_gpu_device->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(interlaced_rendering)].get());
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetViewportAndScissor(scaled_bounds);
struct VRAMFillUBOData
{
u32 u_dst_x;
@@ -3247,7 +3416,13 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)));
uniforms.u_interlaced_displayed_field = active_line_lsb;
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds);
DrawScreenQuad(scaled_bounds);
RestoreDeviceContext();
}
@@ -3357,14 +3532,15 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
{
DeactivateROV();
std::unique_ptr<GPUTexture> upload_texture;
GPUDevice::AutoRecycleTexture upload_texture;
u32 map_index;
if (!g_gpu_device->GetFeatures().supports_texture_buffers)
{
map_index = 0;
upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture,
GPUTexture::Format::R16U, GPUTexture::Flags::None, data, data_pitch);
upload_texture =
g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::R16U,
GPUTexture::Flags::None, data, data_pitch);
if (!upload_texture)
{
ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height);
@@ -3406,21 +3582,20 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
GetCurrentNormalizedVertexDepth()};
// the viewport should already be set to the full vram, so just adjust the scissor
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height());
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
if (upload_texture)
{
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->Draw(3, 0);
g_gpu_device->RecycleTexture(std::move(upload_texture));
}
else
{
g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get());
g_gpu_device->Draw(3, 0);
}
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds);
DrawScreenQuad(scaled_bounds);
RestoreDeviceContext();
}
@@ -3492,12 +3667,16 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
GetCurrentNormalizedVertexDepth()};
// VRAM read texture should already be bound.
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetViewportAndScissor(dst_bounds_scaled);
g_gpu_device->SetPipeline(m_vram_copy_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(dst_bounds_scaled);
DrawScreenQuad(dst_bounds_scaled);
RestoreDeviceContext();
if (check_mask && !m_pgxp_depth_buffer)
@@ -3560,7 +3739,7 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
{
// TODO: avoid all this for vertex loading, only do when the type of draw changes
BatchTextureMode texture_mode = cmd->texture_enable ? m_batch.texture_mode : BatchTextureMode::Disabled;
GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key;
GPUTextureCache::SourceKey texture_cache_key = m_texture_cache_key;
if (cmd->texture_enable)
{
// texture page changed - check that the new page doesn't intersect the drawing area
@@ -3668,9 +3847,9 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
{
if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode ||
(transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) ||
dithering_enable != m_batch.dithering || m_batch_ubo_data.u_texture_window_bits != cmd->window ||
dithering_enable != m_batch.dithering || m_texture_window_bits != cmd->window ||
m_batch_ubo_data.u_set_mask_while_drawing != BoolToUInt32(cmd->set_mask_while_drawing) ||
(texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key))
(texture_mode == BatchTextureMode::PageTexture && m_texture_cache_key != texture_cache_key))
{
FlushRender();
}
@@ -3717,13 +3896,13 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
m_batch.texture_mode = texture_mode;
m_batch.transparency_mode = transparency_mode;
m_batch.dithering = dithering_enable;
m_batch.texture_cache_key = texture_cache_key;
m_texture_cache_key = texture_cache_key;
if (m_batch_ubo_data.u_texture_window_bits != cmd->window)
if (m_texture_window_bits != cmd->window)
{
m_batch_ubo_data.u_texture_window_bits = cmd->window;
m_texture_window_bits = cmd->window;
m_texture_window_active = (cmd->window != GPUTextureWindow{{0xFF, 0xFF, 0x00, 0x00}});
GSVector4i::store<true>(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32());
GSVector4i::store<false>(&m_batch_ubo_data.u_texture_window[0], GSVector4i::load32(&cmd->window).u8to32());
m_batch_ubo_dirty = true;
}
@@ -3767,7 +3946,7 @@ void GPU_HW::FlushRender()
const GPUTextureCache::Source* texture = nullptr;
if (m_batch.texture_mode == BatchTextureMode::PageTexture)
{
texture = LookupSource(m_batch.texture_cache_key, m_current_uv_rect,
texture = LookupSource(m_texture_cache_key, m_current_uv_rect,
m_batch.transparency_mode != GPUTransparencyMode::Disabled ?
GPUTextureCache::PaletteRecordFlags::HasSemiTransparentDraws :
GPUTextureCache::PaletteRecordFlags::None);
@@ -3884,26 +4063,20 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
}
else
{
if (!m_vram_extract_texture || m_vram_extract_texture->GetWidth() != scaled_display_width ||
m_vram_extract_texture->GetHeight() != scaled_display_height)
if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, scaled_display_height,
GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8,
GPUTexture::Flags::None)) [[unlikely]]
{
if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, scaled_display_height,
GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8,
GPUTexture::Flags::None)) [[unlikely]]
{
ClearDisplayTexture();
return;
}
ClearDisplayTexture();
return;
}
m_vram_texture->MakeReadyForSampling();
g_gpu_device->InvalidateRenderTarget(m_vram_extract_texture.get());
if (depth_source &&
((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width &&
m_vram_extract_depth_texture->GetHeight() == scaled_display_height) ||
!g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height,
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None)))
g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height,
GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT, GPUTexture::Flags::None))
{
depth_source->MakeReadyForSampling();
g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get());
@@ -3974,7 +4147,23 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit)
{
DebugAssert(m_display_texture);
DownsampleFramebuffer();
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{
DownsampleFramebufferAdaptive(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height);
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
DownsampleFramebufferAdaptiveStencil(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height,
scaled_vram_offset_x, scaled_vram_offset_y, line_skip);
}
else
{
DownsampleFramebufferBoxFilter(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height);
}
}
if (drew_anything)
@@ -3993,6 +4182,10 @@ void GPU_HW::UpdateDownsamplingLevels()
current_width /= 2;
}
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
m_downsample_scale_or_levels = m_resolution_scale;
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale);
@@ -4012,20 +4205,6 @@ void GPU_HW::OnBufferSwapped()
m_depth_was_copied = false;
}
void GPU_HW::DownsampleFramebuffer()
{
GPUTexture* source = m_display_texture;
const u32 left = m_display_texture_view_x;
const u32 top = m_display_texture_view_y;
const u32 width = m_display_texture_view_width;
const u32 height = m_display_texture_view_height;
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
DownsampleFramebufferAdaptive(source, left, top, width, height);
else
DownsampleFramebufferBoxFilter(source, left, top, width, height);
}
void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{
GL_PUSH_FMT("DownsampleFramebufferAdaptive ({},{} => {},{})", left, top, left + width, left + height);
@@ -4146,6 +4325,94 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
}
void GPU_HW::DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height,
u32 fb_left, u32 fb_top, u32 line_skip)
{
GL_PUSH_FMT("DownsampleFramebufferAdaptiveStencil({},{} => {},{} ({}x{})", left, top, left + width, top + height,
width, height);
const u32 ds_width = width / m_downsample_scale_or_levels;
const u32 ds_height = height / m_downsample_scale_or_levels;
// TODO: Weight texture is broken with MSAA
const bool output_texture_ok =
g_gpu_device->ResizeTexture(&m_downsample_texture, width, height, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT,
GPUTexture::Flags::None, false);
GPUDevice::AutoRecycleTexture downsample_texture = g_gpu_device->FetchAutoRecycleTexture(
ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT, GPUTexture::Flags::None);
GPUDevice::AutoRecycleTexture weight_texture = g_gpu_device->FetchAutoRecycleTexture(
m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(),
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8, GPUTexture::Flags::None);
if (!output_texture_ok || !downsample_texture || !weight_texture)
{
ERROR_LOG("Failed to create {}x{} RT for adaptive stencil downsampling", width, height);
return;
}
{
// fill weight texture
GL_SCOPE("Weights");
const float fill_uniforms_unmarked[4] = {0.0f, 0.0f, 0.0f, 0.0f};
const float fill_uniforms_marked[4] = {1.0f, 1.0f, 1.0f, 1.0f};
g_gpu_device->SetViewportAndScissor(fb_left, fb_top, width, height << line_skip);
g_gpu_device->InvalidateRenderTarget(weight_texture.get());
g_gpu_device->SetRenderTarget(weight_texture.get(), m_vram_depth_texture.get());
g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
g_gpu_device->SetStencilRef(0);
g_gpu_device->PushUniformBuffer(fill_uniforms_unmarked, sizeof(fill_uniforms_unmarked));
g_gpu_device->Draw(3, 0);
g_gpu_device->SetStencilRef(1);
g_gpu_device->PushUniformBuffer(fill_uniforms_marked, sizeof(fill_uniforms_marked));
g_gpu_device->Draw(3, 0);
}
// box downsample
{
GL_SCOPE("Box downsample");
source->MakeReadyForSampling();
const u32 uniforms[9] = {left, top, fb_left, fb_top, line_skip};
g_gpu_device->InvalidateRenderTarget(downsample_texture.get());
g_gpu_device->SetRenderTarget(downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetTextureSampler(1, weight_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height);
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
}
// composite
{
GL_SCOPE("Composite");
const GSVector4 nat_uniforms =
GSVector4(GSVector4i(left, top, width, height)) / GSVector4(GSVector4i::xyxy(source->GetSizeVec()));
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get());
g_gpu_device->SetTextureSampler(0, downsample_texture.get(), g_gpu_device->GetLinearSampler());
g_gpu_device->SetTextureSampler(1, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, width, height);
g_gpu_device->PushUniformBuffer(&nat_uniforms, sizeof(nat_uniforms));
g_gpu_device->Draw(3, 0);
m_downsample_texture->MakeReadyForSampling();
}
GL_POP();
RestoreDeviceContext();
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
}
void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{
GL_SCOPE_FMT("DownsampleFramebufferBoxFilter({},{} => {},{} ({}x{})", left, top, left + width, top + height, width,
@@ -4154,14 +4421,8 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
const u32 ds_width = width / m_downsample_scale_or_levels;
const u32 ds_height = height / m_downsample_scale_or_levels;
if (!m_downsample_texture || m_downsample_texture->GetWidth() != ds_width ||
m_downsample_texture->GetHeight() != ds_height)
{
g_gpu_device->RecycleTexture(std::move(m_downsample_texture));
m_downsample_texture = g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
VRAM_RT_FORMAT, GPUTexture::Flags::None);
}
if (!m_downsample_texture)
if (!g_gpu_device->ResizeTexture(&m_downsample_texture, ds_width, ds_height, GPUTexture::Type::RenderTarget,
VRAM_RT_FORMAT, GPUTexture::Flags::None, false))
{
ERROR_LOG("Failed to create {}x{} RT for box downsampling", width, height);
return;

View File

@@ -15,12 +15,6 @@
#include <tuple>
#include <utility>
class Error;
class GPU_SW_Backend;
struct GPUBackendCommand;
struct GPUBackendDrawCommand;
// TODO: Move to cpp
// TODO: Rename to GPUHWBackend, preserved to avoid conflicts.
class GPU_HW final : public GPUBackend
@@ -121,6 +115,14 @@ private:
static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));
struct alignas(16) ScreenVertex
{
float x;
float y;
float u;
float v;
};
struct alignas(16) BatchVertex
{
float x;
@@ -139,7 +141,7 @@ private:
void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
};
struct alignas(4) BatchConfig
struct BatchConfig
{
BatchTextureMode texture_mode = BatchTextureMode::Disabled;
GPUTransparencyMode transparency_mode = GPUTransparencyMode::Disabled;
@@ -149,14 +151,13 @@ private:
bool check_mask_before_draw = false;
bool use_depth_buffer = false;
bool sprite_mode = false;
GPUTextureCache::SourceKey texture_cache_key = {};
u8 stencil_reference = 0;
// Returns the render mode for this batch.
BatchRenderMode GetRenderMode() const;
};
struct alignas(VECTOR_ALIGNMENT) BatchUBOData
struct BatchUBOData
{
u32 u_texture_window[4]; // and_x, and_y, or_x, or_y
float u_src_alpha_factor;
@@ -166,7 +167,6 @@ private:
float u_resolution_scale;
float u_rcp_resolution_scale;
float u_resolution_scale_minus_one;
GPUTextureWindow u_texture_window_bits; // not actually used on GPU
};
struct RendererStats
@@ -223,6 +223,8 @@ private:
bool ShouldCheckForTexPageOverlap() const;
bool IsFlushed() const;
void DrawScreenQuad(const GSVector4i bounds, const GSVector4 uv_bounds = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f));
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd);
void PrepareDraw(const GPUBackendDrawCommand* cmd);
@@ -233,6 +235,9 @@ private:
const GSVector4i clamped_draw_rect_123);
void ResetBatchVertexDepth();
/// Returns true if the stencil buffer should be filled.
bool UseStencilBuffer() const;
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
float GetCurrentNormalizedVertexDepth() const;
@@ -266,11 +271,13 @@ private:
void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled);
void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices);
void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled);
void SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value);
void UpdateDownsamplingLevels();
void DownsampleFramebuffer();
void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height, u32 fb_left,
u32 fb_top, u32 line_skip);
void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
std::unique_ptr<GPUTexture> m_vram_texture;
@@ -329,6 +336,7 @@ private:
// Changed state
BatchUBOData m_batch_ubo_data = {};
GPUTextureCache::SourceKey m_texture_cache_key = {};
// Bounding box of VRAM area that the GPU has drawn into.
GSVector4i m_vram_dirty_draw_rect = INVALID_RECT;
@@ -349,6 +357,8 @@ private:
u32 bits = INVALID_DRAW_MODE_BITS;
} m_draw_mode = {};
GPUTextureWindow m_texture_window_bits;
std::unique_ptr<GPUPipeline> m_wireframe_pipeline;
// [wrapped][interlaced]
@@ -380,4 +390,5 @@ private:
// common shaders
std::unique_ptr<GPUShader> m_fullscreen_quad_vertex_shader;
std::unique_ptr<GPUShader> m_screen_quad_vertex_shader;
};

View File

@@ -50,6 +50,27 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) const
false);
}
std::string GPU_HW_ShaderGen::GenerateScreenVertexShader() const
{
std::stringstream ss;
WriteHeader(ss);
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0"}, 0, 1, {}, false, "", false, false, false);
ss << R"(
{
// Depth set to 1 for PGXP depth buffer.
v_pos = float4(a_pos, 1.0f, 1.0f);
v_tex0 = a_tex0;
// NDC space Y flip in Vulkan.
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading,
bool textured, bool palette, bool page_texture, bool uv_limits,
bool force_round_texcoords, bool pgxp_depth,
@@ -1803,6 +1824,73 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 multisamples) const
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "MULTISAMPLES", multisamples);
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_fb_base_coords", "uint u_line_skip"}, true);
DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, (multisamples > 1));
ss << "#define FACTOR " << factor << "u\n";
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"(
{
float3 color = float3(0.0, 0.0, 0.0);
float weight = 0.0;
uint2 base_coords = u_base_coords + uint2(v_pos.xy) * uint2(FACTOR, FACTOR);
uint2 fb_base_coords = u_fb_base_coords + uint2(uint(v_pos.x) * FACTOR, (uint(v_pos.y) << u_line_skip) * FACTOR);
for (uint offset_x = 0u; offset_x < FACTOR; offset_x++)
{
for (uint offset_y = 0u; offset_y < FACTOR; offset_y++)
{
int2 lcoords = int2(base_coords + uint2(offset_x, offset_y));
color += LOAD_TEXTURE(samp0, lcoords, 0).rgb;
int2 fbcoords = int2(fb_base_coords + uint2(offset_x, offset_y << u_line_skip));
#if MULTISAMPLES > 1
for (int i = 0; i < MULTISAMPLES; i++)
weight += LOAD_TEXTURE_MS(samp1, fbcoords, i).r;
#else
weight += LOAD_TEXTURE(samp1, fbcoords, 0).r;
#endif
}
}
color /= float(FACTOR * FACTOR);
o_col0 = float4(color, float(weight != 0.0));
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const
{
std::stringstream ss;
WriteHeader(ss);
DeclareUniformBuffer(ss, {"float4 u_native_rect"}, true);
DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, false);
DeclareTexture(ss, "samp2", 2, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"(
{
float4 downsample_color = SAMPLE_TEXTURE(samp0, v_tex0);
float native_weight = float(downsample_color.a != 0.0);
float2 native_coords = u_native_rect.xy + v_tex0 * u_native_rect.zw;
float4 native_color = SAMPLE_TEXTURE(samp1, native_coords);
o_col0 = lerp(downsample_color, native_color, native_weight);
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const
{
std::stringstream ss;

View File

@@ -13,6 +13,8 @@ public:
GPU_HW_ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch);
~GPU_HW_ShaderGen();
std::string GenerateScreenVertexShader() const;
std::string GenerateBatchVertexShader(bool upscaled, bool msaa, bool per_sample_shading, bool textured, bool palette,
bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth,
bool disable_color_perspective) const;
@@ -41,6 +43,8 @@ public:
std::string GenerateAdaptiveDownsampleBlurFragmentShader() const;
std::string GenerateAdaptiveDownsampleCompositeFragmentShader() const;
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor) const;
std::string GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 msaa) const;
std::string GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const;
std::string GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const;

View File

@@ -1565,11 +1565,12 @@ const char* Settings::GetLineDetectModeDisplayName(GPULineDetectMode mode)
"GPULineDetectMode");
}
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive"};
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive", "AdaptiveStencil"};
static constexpr const std::array s_downsample_mode_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Disabled", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Box (Downsample 3D/Smooth All)", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive Sharp (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
std::optional<GPUDownsampleMode> Settings::ParseDownsampleModeName(const char* str)
{

View File

@@ -105,6 +105,7 @@ enum class GPUDownsampleMode : u8
Disabled,
Box,
Adaptive,
AdaptiveStencil,
Count
};

View File

@@ -646,6 +646,13 @@ void D3D11Device::ClearDepth(GPUTexture* t, float d)
T->CommitClear(m_context.Get());
}
void D3D11Device::ClearStencil(GPUTexture* t, u8 value)
{
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
DebugAssert(T->HasStencil());
m_context->ClearDepthStencilView(T->GetD3DDSV(), D3D11_CLEAR_STENCIL, 0.0f, value);
}
void D3D11Device::InvalidateRenderTarget(GPUTexture* t)
{
D3D11Texture* const T = static_cast<D3D11Texture*>(t);
@@ -1146,6 +1153,16 @@ void D3D11Device::SetScissor(const GSVector4i rc)
m_context->RSSetScissorRects(1, &drc);
}
void D3D11Device::SetStencilRef(u8 value)
{
if (m_current_stencil_ref == value)
return;
m_current_stencil_ref = value;
if (m_current_pipeline)
m_context->OMSetDepthStencilState(m_current_pipeline->GetDepthStencilState(), m_current_stencil_ref);
}
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);

View File

@@ -70,6 +70,7 @@ public:
u32 src_x, u32 src_y, u32 width, u32 height) override;
void ClearRenderTarget(GPUTexture* t, u32 c) override;
void ClearDepth(GPUTexture* t, float d) override;
void ClearStencil(GPUTexture* t, u8 value) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
@@ -101,6 +102,7 @@ public:
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
void SetViewport(const GSVector4i rc) override;
void SetScissor(const GSVector4i rc) override;
void SetStencilRef(u8 value) override;
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
@@ -131,7 +133,7 @@ private:
size_t operator()(const BlendStateMapKey& key) const;
};
using RasterizationStateMap = std::unordered_map<u8, ComPtr<ID3D11RasterizerState>>;
using DepthStateMap = std::unordered_map<u8, ComPtr<ID3D11DepthStencilState>>;
using DepthStateMap = std::unordered_map<u32, ComPtr<ID3D11DepthStencilState>>;
using BlendStateMap = std::unordered_map<BlendStateMapKey, ComPtr<ID3D11BlendState>, BlendStateMapHash>;
using InputLayoutMap =
std::unordered_map<GPUPipeline::InputLayout, ComPtr<ID3D11InputLayout>, GPUPipeline::InputLayoutHash>;
@@ -198,6 +200,7 @@ private:
D3D_PRIMITIVE_TOPOLOGY m_current_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
u32 m_current_vertex_stride = 0;
u32 m_current_blend_factor = 0;
u8 m_current_stencil_ref = 0;
std::array<ID3D11ShaderResourceView*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
std::array<ID3D11SamplerState*, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};

View File

@@ -192,8 +192,8 @@ D3D11Device::ComPtr<ID3D11DepthStencilState> D3D11Device::GetDepthState(const GP
return dds;
}
static constexpr std::array<D3D11_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping =
{{
static constexpr std::array<D3D11_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)>
func_mapping = {{
D3D11_COMPARISON_NEVER, // Never
D3D11_COMPARISON_ALWAYS, // Always
D3D11_COMPARISON_LESS, // Less
@@ -203,10 +203,36 @@ D3D11Device::ComPtr<ID3D11DepthStencilState> D3D11Device::GetDepthState(const GP
D3D11_COMPARISON_EQUAL, // Equal
}};
static constexpr std::array<D3D11_STENCIL_OP, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping =
{{
D3D11_STENCIL_OP_KEEP, // Keep
D3D11_STENCIL_OP_ZERO, // Zero
D3D11_STENCIL_OP_REPLACE, // Replace
D3D11_STENCIL_OP_INCR_SAT, // IncrSat
D3D11_STENCIL_OP_DECR_SAT, // DecrSat
D3D11_STENCIL_OP_INVERT, // Invert
D3D11_STENCIL_OP_INCR, // Incr
D3D11_STENCIL_OP_DECR, // Decr
}};
D3D11_DEPTH_STENCIL_DESC desc = {};
desc.DepthEnable = ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write;
desc.DepthEnable = ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write;
desc.DepthFunc = func_mapping[static_cast<u8>(ds.depth_test.GetValue())];
desc.DepthWriteMask = ds.depth_write ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
desc.StencilEnable = ds.stencil_enable;
if (ds.stencil_enable)
{
desc.StencilReadMask = 0xFF;
desc.StencilWriteMask = 0xFF;
desc.FrontFace.StencilFailOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())];
desc.FrontFace.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())];
desc.FrontFace.StencilPassOp = stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())];
desc.FrontFace.StencilFunc = func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())];
desc.BackFace.StencilFailOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_fail_op.GetValue())];
desc.BackFace.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_depth_fail_op.GetValue())];
desc.BackFace.StencilPassOp = stencil_op_mapping[static_cast<u8>(ds.back_stencil_pass_op.GetValue())];
desc.BackFace.StencilFunc = func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())];
}
HRESULT hr = m_device->CreateDepthStencilState(&desc, dds.GetAddressOf());
if (FAILED(hr)) [[unlikely]]
@@ -449,7 +475,7 @@ void D3D11Device::SetPipeline(GPUPipeline* pipeline)
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
m_context->OMSetDepthStencilState(ds, m_current_stencil_ref);
}
if (ID3D11BlendState* bs = PL->GetBlendState();

View File

@@ -1509,6 +1509,16 @@ void D3D12Device::ClearDepth(GPUTexture* t, float d)
EndRenderPass();
}
void D3D12Device::ClearStencil(GPUTexture* t, u8 value)
{
DebugAssert(t->HasStencil());
if (InRenderPass() && m_current_depth_target == t)
EndRenderPass();
GetCommandList()->ClearDepthStencilView(static_cast<D3D12Texture*>(t)->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL,
0.0f, value, 0, nullptr);
}
void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
@@ -1892,8 +1902,12 @@ void D3D12Device::BeginRenderPass()
ds_desc_p = &ds_desc;
ds_desc.cpuDescriptor = ds->GetWriteDescriptor();
ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
ds_desc.StencilBeginningAccess = {};
ds_desc.StencilEndingAccess = {};
ds_desc.StencilBeginningAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE :
D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD,
{}};
ds_desc.StencilEndingAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE :
D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD,
{}};
switch (ds->GetState())
{
@@ -2073,6 +2087,8 @@ void D3D12Device::SetInitialPipelineState()
m_current_blend_constant = m_current_pipeline->GetBlendConstants();
cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
cmdlist->OMSetStencilRef(m_current_stencil_ref);
SetViewport(cmdlist);
SetScissor(cmdlist);
}
@@ -2101,6 +2117,15 @@ void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist)
cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor));
}
void D3D12Device::SetStencilRef(u8 value)
{
if (m_current_stencil_ref == value)
return;
m_current_stencil_ref = value;
GetCommandList()->OMSetStencilRef(m_current_stencil_ref);
}
void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
{
D3D12Texture* T = static_cast<D3D12Texture*>(texture);

View File

@@ -91,6 +91,7 @@ public:
u32 src_x, u32 src_y, u32 width, u32 height) override;
void ClearRenderTarget(GPUTexture* t, u32 c) override;
void ClearDepth(GPUTexture* t, float d) override;
void ClearStencil(GPUTexture* t, u8 value) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
@@ -122,6 +123,7 @@ public:
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
void SetViewport(const GSVector4i rc) override;
void SetScissor(const GSVector4i rc) override;
void SetStencilRef(u8 value) override;
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
@@ -357,6 +359,7 @@ private:
u32 m_current_vertex_stride = 0;
u32 m_current_blend_constant = 0;
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
u8 m_current_stencil_ref = 0;
std::array<D3D12Texture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
std::array<D3D12DescriptorHandle, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};

View File

@@ -165,7 +165,7 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
D3D12_CULL_MODE_BACK, // Back
}};
static constexpr std::array<D3D12_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)>
static constexpr std::array<D3D12_COMPARISON_FUNC, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)>
compare_mapping = {{
D3D12_COMPARISON_FUNC_NEVER, // Never
D3D12_COMPARISON_FUNC_ALWAYS, // Always
@@ -176,6 +176,18 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
D3D12_COMPARISON_FUNC_EQUAL, // Equal
}};
static constexpr std::array<D3D12_STENCIL_OP, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping =
{{
D3D12_STENCIL_OP_KEEP, // Keep
D3D12_STENCIL_OP_ZERO, // Zero
D3D12_STENCIL_OP_REPLACE, // Replace
D3D12_STENCIL_OP_INCR_SAT, // IncrSat
D3D12_STENCIL_OP_DECR_SAT, // DecrSat
D3D12_STENCIL_OP_INVERT, // Invert
D3D12_STENCIL_OP_INCR, // Incr
D3D12_STENCIL_OP_DECR, // Decr
}};
static constexpr std::array<D3D12_BLEND, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
D3D12_BLEND_ZERO, // Zero
D3D12_BLEND_ONE, // One
@@ -238,9 +250,28 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
cull_mapping[static_cast<u8>(config.rasterization.cull_mode.GetValue())], false);
if (config.samples > 1)
gpb.SetMultisamples(config.samples);
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write,
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write,
config.depth.depth_write, compare_mapping[static_cast<u8>(config.depth.depth_test.GetValue())]);
gpb.SetNoStencilState();
if (config.depth.stencil_enable)
{
const D3D12_DEPTH_STENCILOP_DESC front = {
.StencilFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_fail_op.GetValue())],
.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_depth_fail_op.GetValue())],
.StencilPassOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_pass_op.GetValue())],
.StencilFunc = compare_mapping[static_cast<u8>(config.depth.front_stencil_func.GetValue())],
};
const D3D12_DEPTH_STENCILOP_DESC back = {
.StencilFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_fail_op.GetValue())],
.StencilDepthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_depth_fail_op.GetValue())],
.StencilPassOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_pass_op.GetValue())],
.StencilFunc = compare_mapping[static_cast<u8>(config.depth.back_stencil_func.GetValue())],
};
gpb.SetStencilState(config.depth.stencil_enable, 0xFF, 0xFF, front, back);
}
else
{
gpb.SetNoStencilState();
}
gpb.SetBlendState(0, config.blend.enable, blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())],
blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())],

View File

@@ -129,14 +129,14 @@ GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState(
GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState()
{
DepthState ret = {};
ret.depth_test = DepthFunc::Always;
ret.depth_test = ComparisonFunc::Always;
return ret;
}
GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState()
{
DepthState ret = {};
ret.depth_test = DepthFunc::Always;
ret.depth_test = ComparisonFunc::Always;
ret.depth_write = true;
return ret;
}
@@ -815,6 +815,16 @@ void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height)
SetScissor(GSVector4i(x, y, x + width, y + height));
}
void GPUDevice::SetStencilRef(u8 value) /*= 0*/
{
Panic("FIXME");
}
void GPUDevice::ClearStencil(GPUTexture* t, u8 value) /*= 0*/
{
Panic("FIXME");
}
void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height)
{
SetViewportAndScissor(GSVector4i(x, y, x + width, y + height));
@@ -1222,6 +1232,12 @@ bool GPUDevice::ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u
GPUTexture::Format format, GPUTexture::Flags flags, bool preserve /* = true */)
{
GPUTexture* old_tex = tex->get();
if (old_tex && old_tex->GetWidth() == new_width && old_tex->GetHeight() == new_height && old_tex->GetType() == type &&
old_tex->GetFormat() == format && old_tex->GetFlags() == flags)
{
return true;
}
DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1));
std::unique_ptr<GPUTexture> new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format, flags);
if (!new_tex) [[unlikely]]

View File

@@ -289,7 +289,7 @@ public:
MaxCount
};
enum class DepthFunc : u8
enum class ComparisonFunc : u8
{
Never,
Always,
@@ -302,6 +302,20 @@ public:
MaxCount
};
enum class StencilOp : u8
{
Keep,
Zero,
Replace,
IncrSat,
DecrSat,
Invert,
Incr,
Decr,
MaxCount
};
enum class BlendFunc : u8
{
Zero,
@@ -353,9 +367,19 @@ public:
union DepthState
{
BitField<u8, DepthFunc, 0, 3> depth_test;
BitField<u8, bool, 4, 1> depth_write;
u8 key;
BitField<u32, ComparisonFunc, 0, 3> depth_test;
BitField<u32, bool, 4, 1> depth_write;
BitField<u32, bool, 5, 1> stencil_enable;
BitField<u32, StencilOp, 6, 3> front_stencil_fail_op;
BitField<u32, StencilOp, 9, 3> front_stencil_depth_fail_op;
BitField<u32, StencilOp, 12, 3> front_stencil_pass_op;
BitField<u32, ComparisonFunc, 15, 3> front_stencil_func;
BitField<u32, StencilOp, 18, 3> back_stencil_fail_op;
BitField<u32, StencilOp, 21, 3> back_stencil_depth_fail_op;
BitField<u32, StencilOp, 24, 3> back_stencil_pass_op;
BitField<u32, ComparisonFunc, 27, 3> back_stencil_func;
u32 key;
// clang-format off
ALWAYS_INLINE DepthState() = default;
@@ -366,6 +390,13 @@ public:
ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; }
// clang-format on
ALWAYS_INLINE bool DepthMatches(const DepthState& ds) const { return ((key & 0x0Fu) == (ds.key & 0x0Fu)); }
ALWAYS_INLINE bool StencilMatches(const DepthState& ds) const
{
return ((key & 0x1FFFFFF0u) == (ds.key & 0x1FFFFFF0u));
}
ALWAYS_INLINE bool FrontAndBackStencilAreSame() const { return ((key >> 6) & 0xFFFu) == ((key >> 18) & 0xFFFu); }
static DepthState GetNoTestsState();
static DepthState GetAlwaysWriteState();
};
@@ -417,11 +448,11 @@ public:
struct GraphicsConfig
{
Layout layout;
Primitive primitive;
InputLayout input_layout;
Layout layout;
Primitive primitive;
RasterizationState rasterization;
DepthState depth;
BlendState blend;
@@ -773,6 +804,7 @@ public:
GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0;
virtual void ClearRenderTarget(GPUTexture* t, u32 c);
virtual void ClearDepth(GPUTexture* t, float d);
virtual void ClearStencil(GPUTexture* t, u8 value) = 0;
virtual void InvalidateRenderTarget(GPUTexture* t);
/// Shader abstraction.
@@ -826,6 +858,7 @@ public:
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
virtual void SetViewport(const GSVector4i rc) = 0;
virtual void SetScissor(const GSVector4i rc) = 0;
virtual void SetStencilRef(u8 value) = 0;
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags);
void SetViewport(s32 x, s32 y, s32 width, s32 height);

View File

@@ -151,6 +151,7 @@ public:
{
return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil);
}
ALWAYS_INLINE bool HasStencil() const { return IsDepthStencilFormat(m_format); }
ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; }
ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; }

View File

@@ -192,6 +192,33 @@ void OpenGLDevice::ClearDepth(GPUTexture* t, float d)
CommitDSClearInFB(static_cast<OpenGLTexture*>(t));
}
void OpenGLDevice::ClearStencil(GPUTexture* t, u8 value)
{
OpenGLTexture* T = static_cast<OpenGLTexture*>(t);
DebugAssert(T->HasStencil());
glDisable(GL_SCISSOR_TEST);
const GLint ivalue = value;
if (m_current_depth_target == T)
{
glClearBufferiv(GL_STENCIL, 0, &ivalue);
}
else
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, T->GetGLTarget(), T->GetGLId(), 0);
glClearBufferiv(GL_STENCIL, 0, &ivalue);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo);
}
glEnable(GL_SCISSOR_TEST);
}
void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
@@ -708,7 +735,8 @@ GLuint OpenGLDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUT
if (ds)
{
OpenGLTexture* const DS = static_cast<OpenGLTexture*>(ds);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, DS->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
DS->GetGLTarget(), DS->GetGLId(), 0);
}
glDrawBuffers(num_rts, s_draw_buffers.data());
@@ -746,6 +774,7 @@ bool OpenGLDevice::CreateBuffers()
GL_OBJECT_NAME(m_uniform_buffer, "Device Uniform Buffer");
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
m_uniform_buffer_alignment = std::max<GLuint>(m_uniform_buffer_alignment, 16);
if (!m_disable_pbo)
{
@@ -1270,6 +1299,16 @@ void OpenGLDevice::SetScissor(const GSVector4i rc)
UpdateScissor();
}
void OpenGLDevice::SetStencilRef(u8 value)
{
if (m_last_stencil_ref == value)
return;
m_last_stencil_ref = value;
if (m_last_depth_state.stencil_enable)
UpdateStencilFunc();
}
void OpenGLDevice::UpdateViewport()
{
glViewport(m_last_viewport.left, m_last_viewport.top, m_last_viewport.width(), m_last_viewport.height());

View File

@@ -74,6 +74,7 @@ public:
u32 src_x, u32 src_y, u32 width, u32 height) override;
void ClearRenderTarget(GPUTexture* t, u32 c) override;
void ClearDepth(GPUTexture* t, float d) override;
void ClearStencil(GPUTexture* t, u8 value) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
@@ -105,6 +106,7 @@ public:
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
void SetViewport(const GSVector4i rc) override;
void SetScissor(const GSVector4i rc) override;
void SetStencilRef(u8 value) override;
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
@@ -173,6 +175,7 @@ private:
void UpdateViewport();
void UpdateScissor();
void UpdateStencilFunc();
void CreateTimestampQueries();
void DestroyTimestampQueries();
@@ -206,8 +209,9 @@ private:
// VAO cache - fixed max as key
OpenGLPipeline::VertexArrayCache::const_iterator m_last_vao = m_vao_cache.cend();
GPUPipeline::BlendState m_last_blend_state = {};
GPUPipeline::RasterizationState m_last_rasterization_state = {};
GPUPipeline::DepthState m_last_depth_state = {};
GPUPipeline::RasterizationState m_last_rasterization_state = {};
u8 m_last_stencil_ref = 0;
GLuint m_uniform_buffer_alignment = 1;
GLuint m_last_program = 0;
u32 m_last_texture_unit = 0;

View File

@@ -686,9 +686,8 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyRasterizationState(GPUPipeline::Ra
m_last_rasterization_state = rs;
}
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds)
{
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping = {{
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)> s_comparison_func_mapping =
{{
GL_NEVER, // Never
GL_ALWAYS, // Always
GL_LESS, // Less
@@ -698,18 +697,82 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState
GL_EQUAL, // Equal
}};
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds)
{
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{
GL_KEEP, // Keep
GL_ZERO, // Zero
GL_REPLACE, // Replace
GL_INCR, // IncrSat
GL_DECR, // DecrSat
GL_INVERT, // Invert
GL_INCR_WRAP, // Incr
GL_DECR_WRAP, // Decr
}};
if (m_last_depth_state == ds)
return;
(ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) :
glDisable(GL_DEPTH_TEST);
glDepthFunc(func_mapping[static_cast<u8>(ds.depth_test.GetValue())]);
if (m_last_depth_state.depth_write != ds.depth_write)
glDepthMask(ds.depth_write);
if (!m_last_depth_state.DepthMatches(ds))
{
(ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) :
glDisable(GL_DEPTH_TEST);
glDepthFunc(s_comparison_func_mapping[static_cast<u8>(ds.depth_test.GetValue())]);
if (m_last_depth_state.depth_write != ds.depth_write)
glDepthMask(ds.depth_write);
}
if (!m_last_depth_state.StencilMatches(ds))
{
if (m_last_depth_state.stencil_enable != ds.stencil_enable)
ds.stencil_enable ? glEnable(GL_STENCIL_TEST) : glDisable(GL_STENCIL_TEST);
if (ds.FrontAndBackStencilAreSame())
{
glStencilFuncSeparate(GL_FRONT_AND_BACK,
s_comparison_func_mapping[static_cast<u8>(ds.front_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
glStencilOpSeparate(GL_FRONT_AND_BACK, stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())]);
}
else
{
glStencilFuncSeparate(GL_FRONT, s_comparison_func_mapping[static_cast<u8>(ds.front_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
glStencilFuncSeparate(GL_BACK, s_comparison_func_mapping[static_cast<u8>(ds.back_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
glStencilOpSeparate(GL_FRONT, stencil_op_mapping[static_cast<u8>(ds.front_stencil_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.front_stencil_depth_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.front_stencil_pass_op.GetValue())]);
glStencilOpSeparate(GL_BACK, stencil_op_mapping[static_cast<u8>(ds.back_stencil_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.back_stencil_depth_fail_op.GetValue())],
stencil_op_mapping[static_cast<u8>(ds.back_stencil_pass_op.GetValue())]);
}
}
m_last_depth_state = ds;
}
void OpenGLDevice::UpdateStencilFunc()
{
if (m_last_depth_state.FrontAndBackStencilAreSame())
{
glStencilFuncSeparate(GL_FRONT_AND_BACK,
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.front_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
}
else
{
glStencilFuncSeparate(GL_FRONT,
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.front_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
glStencilFuncSeparate(GL_BACK,
s_comparison_func_mapping[static_cast<u8>(m_last_depth_state.back_stencil_func.GetValue())],
m_last_stencil_ref, 0xFF);
}
}
ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyBlendState(GPUPipeline::BlendState bs)
{
static constexpr std::array<GLenum, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{

View File

@@ -587,7 +587,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
const GLenum attachment = tex->IsDepthStencil() ?
(tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) :
GL_COLOR_ATTACHMENT0;
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0);
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
@@ -612,7 +614,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex)
{
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo);
const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0;
const GLenum attachment = tex->IsDepthStencil() ?
(tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) :
GL_COLOR_ATTACHMENT0;
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0);
if (tex->IsDepthStencil())
@@ -701,7 +705,7 @@ void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex)
{
case GPUTexture::State::Invalidated:
{
const GLenum attachment = GL_DEPTH_ATTACHMENT;
const GLenum attachment = tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
if (glInvalidateFramebuffer)
glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment);
tex->SetState(GPUTexture::State::Dirty);

View File

@@ -582,9 +582,9 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t
GL_OBJECT_NAME(vso, "Post-processing rotate blit VS");
GL_OBJECT_NAME(vso, "Post-processing rotate blit FS");
const GPUPipeline::GraphicsConfig config = {.layout = GPUPipeline::Layout::SingleTextureAndPushConstants,
const GPUPipeline::GraphicsConfig config = {.input_layout = {},
.layout = GPUPipeline::Layout::SingleTextureAndPushConstants,
.primitive = GPUPipeline::Primitive::Triangles,
.input_layout = {},
.rasterization = GPUPipeline::RasterizationState::GetNoCullState(),
.depth = GPUPipeline::DepthState::GetNoTestsState(),
.blend = GPUPipeline::BlendState::GetNoBlendingState(),

View File

@@ -1976,7 +1976,7 @@ bool VulkanDevice::CreateDeviceAndMainSwapChain(std::string_view adapter, Featur
// Read device physical memory properties, we need it for allocating buffers
vkGetPhysicalDeviceProperties(m_physical_device, &m_device_properties);
m_device_properties.limits.minUniformBufferOffsetAlignment =
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(16));
m_device_properties.limits.minTexelBufferOffsetAlignment =
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
@@ -2317,8 +2317,8 @@ void VulkanDevice::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u
m_current_swap_chain = nullptr;
VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget, 0, 1, 0,
1, VulkanTexture::Layout::ColorAttachment,
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget,
SC->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment,
VulkanTexture::Layout::PresentSrc);
EndAndSubmitCommandBuffer(SC, explicit_present);
InvalidateCachedState();
@@ -2647,6 +2647,25 @@ void VulkanDevice::ClearDepth(GPUTexture* t, float d)
}
}
void VulkanDevice::ClearStencil(GPUTexture* t, u8 value)
{
VulkanTexture* T = static_cast<VulkanTexture*>(t);
const VkClearDepthStencilValue clear_value = {0.0f, static_cast<u32>(value)};
if (InRenderPass() && m_current_depth_target == T)
{
// Use an attachment clear so the render pass isn't restarted.
const VkClearAttachment ca = {VK_IMAGE_ASPECT_STENCIL_BIT, 0, {.depthStencil = clear_value}};
const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u};
vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc);
}
else
{
const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u};
T->TransitionToLayout(VulkanTexture::Layout::ClearDst);
vkCmdClearDepthStencilImage(m_current_command_buffer, T->GetImage(), T->GetVkLayout(), &clear_value, 1, &srr);
}
}
void VulkanDevice::InvalidateRenderTarget(GPUTexture* t)
{
GPUDevice::InvalidateRenderTarget(t);
@@ -3012,11 +3031,13 @@ void VulkanDevice::RenderBlankFrame(VulkanSwapChain* swap_chain)
const VkImage image = swap_chain->GetCurrentImage();
static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
static constexpr VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}};
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst);
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(),
0, 1, 0, 1, VulkanTexture::Layout::Undefined,
VulkanTexture::Layout::TransferDst);
vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &srr);
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc);
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(),
0, 1, 0, 1, VulkanTexture::Layout::TransferDst,
VulkanTexture::Layout::PresentSrc);
EndAndSubmitCommandBuffer(swap_chain, false);
@@ -3205,7 +3226,7 @@ void VulkanDevice::BeginRenderPass()
VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
std::array<VkRenderingAttachmentInfoKHR, MAX_RENDER_TARGETS> attachments;
VkRenderingAttachmentInfoKHR depth_attachment;
VkRenderingAttachmentInfoKHR depth_attachment, stencil_attachment;
if (m_num_current_render_targets > 0 || m_current_depth_target)
{
@@ -3276,6 +3297,20 @@ void VulkanDevice::BeginRenderPass()
depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u};
ds->SetState(GPUTexture::State::Dirty);
if (ds->HasStencil())
{
stencil_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
stencil_attachment.pNext = nullptr;
stencil_attachment.imageView = ds->GetView();
stencil_attachment.imageLayout = ds->GetVkLayout();
stencil_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
stencil_attachment.resolveImageView = VK_NULL_HANDLE;
stencil_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
ri.pStencilAttachment = &stencil_attachment;
}
}
const VulkanTexture* const rt_or_ds =
@@ -3372,7 +3407,15 @@ void VulkanDevice::BeginRenderPass()
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
{
SetInitialPipelineState();
}
else if (m_current_depth_target && m_current_depth_target->IsDepthStencil())
{
// Stencil reference still needs to be set.
vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK,
ZeroExtend32(m_current_stencil_ref));
}
}
void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 clear_color)
@@ -3383,8 +3426,8 @@ void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 cle
const VkImage swap_chain_image = swap_chain->GetCurrentImage();
// Swap chain images start in undefined
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
VulkanTexture::Layout::Undefined,
VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget,
swap_chain->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::Undefined,
VulkanTexture::Layout::ColorAttachment);
// All textures should be in shader read only optimal already, but just in case..
@@ -3563,6 +3606,12 @@ void VulkanDevice::SetInitialPipelineState()
const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top},
{static_cast<u32>(m_current_scissor.width()), static_cast<u32>(m_current_scissor.height())}};
vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc);
if (m_current_depth_target && m_current_depth_target->IsDepthStencil())
{
vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK,
ZeroExtend32(m_current_stencil_ref));
}
}
void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
@@ -3667,6 +3716,20 @@ void VulkanDevice::SetViewport(const GSVector4i rc)
vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp);
}
void VulkanDevice::SetStencilRef(u8 value)
{
if (m_current_stencil_ref == value)
return;
m_current_stencil_ref = value;
// if current DS does not have a stencil component, then dynamic stencil state will not be enabled
if (!InRenderPass() || !m_current_depth_target || !m_current_depth_target->IsDepthStencil())
return;
vkCmdSetStencilReference(m_current_command_buffer, VK_STENCIL_FACE_FRONT_AND_BACK, ZeroExtend32(value));
}
void VulkanDevice::SetScissor(const GSVector4i rc)
{
if (m_current_scissor.eq(rc))

View File

@@ -108,6 +108,7 @@ public:
u32 src_x, u32 src_y, u32 width, u32 height) override;
void ClearRenderTarget(GPUTexture* t, u32 c) override;
void ClearDepth(GPUTexture* t, float d) override;
void ClearStencil(GPUTexture* t, u8 value) override;
void InvalidateRenderTarget(GPUTexture* t) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
@@ -139,6 +140,7 @@ public:
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
void SetViewport(const GSVector4i rc) override;
void SetScissor(const GSVector4i rc) override;
void SetStencilRef(u8 value) override;
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
@@ -471,6 +473,7 @@ private:
VulkanPipeline* m_current_pipeline = nullptr;
GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
u8 m_current_stencil_ref = 0;
std::array<VulkanTexture*, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
std::array<VkSampler, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};

View File

@@ -150,7 +150,7 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
VK_CULL_MODE_BACK_BIT, // Back
}};
static constexpr std::array<VkCompareOp, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> compare_mapping = {{
static constexpr std::array<VkCompareOp, static_cast<u32>(GPUPipeline::ComparisonFunc::MaxCount)> compare_mapping = {{
VK_COMPARE_OP_NEVER, // Never
VK_COMPARE_OP_ALWAYS, // Always
VK_COMPARE_OP_LESS, // Less
@@ -160,6 +160,17 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
VK_COMPARE_OP_EQUAL, // Equal
}};
static constexpr std::array<VkStencilOp, static_cast<u32>(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{
VK_STENCIL_OP_KEEP, // Keep
VK_STENCIL_OP_ZERO, // Zero
VK_STENCIL_OP_REPLACE, // Replace
VK_STENCIL_OP_INCREMENT_AND_CLAMP, // IncrSat
VK_STENCIL_OP_DECREMENT_AND_CLAMP, // DecrSat
VK_STENCIL_OP_INVERT, // Invert
VK_STENCIL_OP_INCREMENT_AND_WRAP, // Incr
VK_STENCIL_OP_DECREMENT_AND_WRAP, // Decr
}};
static constexpr std::array<VkBlendFactor, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
VK_BLEND_FACTOR_ZERO, // Zero
VK_BLEND_FACTOR_ONE, // One
@@ -215,9 +226,35 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
VK_FRONT_FACE_CLOCKWISE);
if (config.samples > 1)
gpb.SetMultisamples(config.samples, config.per_sample_shading);
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write,
gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write,
config.depth.depth_write, compare_mapping[static_cast<u8>(config.depth.depth_test.GetValue())]);
gpb.SetNoStencilState();
if (config.depth.stencil_enable)
{
const VkStencilOpState front = {
.failOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_fail_op.GetValue())],
.passOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_pass_op.GetValue())],
.depthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.front_stencil_depth_fail_op.GetValue())],
.compareOp = compare_mapping[static_cast<u8>(config.depth.front_stencil_func.GetValue())],
.compareMask = 0xFFu,
.writeMask = 0xFFu,
.reference = 0x00u,
};
const VkStencilOpState back = {
.failOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_fail_op.GetValue())],
.passOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_pass_op.GetValue())],
.depthFailOp = stencil_op_mapping[static_cast<u8>(config.depth.back_stencil_depth_fail_op.GetValue())],
.compareOp = compare_mapping[static_cast<u8>(config.depth.back_stencil_func.GetValue())],
.compareMask = 0xFFu,
.writeMask = 0xFFu,
.reference = 0x00u,
};
gpb.SetStencilState(true, front, back);
}
else
{
gpb.SetNoStencilState();
}
for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
{
@@ -239,6 +276,9 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT);
gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR);
if (GPUTexture::IsDepthStencilFormat(config.depth_format))
gpb.AddDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE);
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(config.render_pass_flags))]
[static_cast<size_t>(config.layout)]);
@@ -258,8 +298,9 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
if (config.depth_format != GPUTexture::Format::Unknown)
{
gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)],
VK_FORMAT_UNDEFINED);
const VkFormat vk_format = VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)];
gpb.SetDynamicRenderingDepthAttachment(
vk_format, GPUTexture::IsDepthStencilFormat(config.depth_format) ? vk_format : VK_FORMAT_UNDEFINED);
}
if (config.render_pass_flags & GPUPipeline::ColorFeedbackLoop)

View File

@@ -116,7 +116,9 @@ std::unique_ptr<VulkanTexture> VulkanTexture::Create(u32 width, u32 height, u32
DebugAssert(levels == 1);
ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
vci.subresourceRange.aspectMask = IsDepthStencilFormat(format) ?
(VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) :
VK_IMAGE_ASPECT_STENCIL_BIT;
}
break;
@@ -490,19 +492,19 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe
u32 start_level, u32 num_levels, Layout old_layout,
Layout new_layout)
{
TransitionSubresourcesToLayout(command_buffer, m_image, m_type, start_layer, num_layers, start_level, num_levels,
old_layout, new_layout);
TransitionSubresourcesToLayout(command_buffer, m_image, m_type, m_format, start_layer, num_layers, start_level,
num_levels, old_layout, new_layout);
}
void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type,
u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels,
Layout old_layout, Layout new_layout)
Format format, u32 start_layer, u32 num_layers, u32 start_level,
u32 num_levels, Layout old_layout, Layout new_layout)
{
VkImageAspectFlags aspect;
if (type == Type::DepthStencil)
{
// TODO: detect stencil
aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
aspect = IsDepthStencilFormat(format) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) :
VK_IMAGE_ASPECT_DEPTH_BIT;
}
else
{

View File

@@ -73,9 +73,9 @@ public:
void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_layer, u32 num_layers, u32 start_level,
u32 num_levels, Layout old_layout, Layout new_layout);
static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, u32 start_layer,
u32 num_layers, u32 start_level, u32 num_levels, Layout old_layout,
Layout new_layout);
static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, Format format,
u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels,
Layout old_layout, Layout new_layout);
// Call when the texture is bound to the pipeline, or read from in a copy.
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }