GPU: Move draw helper functions to cpp file

No derived classes that need this inlined anymore.
This commit is contained in:
Stenzek
2025-12-13 16:03:33 +10:00
parent ed531da03b
commit 69784d10a3
3 changed files with 112 additions and 107 deletions

View File

@@ -1931,7 +1931,7 @@ void GPU::ClearDisplay()
void GPU::UpdateDisplay(bool submit_frame)
{
const bool interlaced = IsInterlacedDisplayEnabled();
const u8 interlaced_field = GetInterlacedDisplayField();
const u8 interlaced_field = m_crtc_state.interlaced_field;
const bool line_skip = (interlaced && m_GPUSTAT.vertical_resolution);
// NOTE: Must be split out, since this can push commands itself (e.g. media capture).

View File

@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
@@ -274,12 +274,6 @@ private:
void UpdateDMARequest();
void UpdateGPUIdle();
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
ALWAYS_INLINE u8 GetInterlacedDisplayField() const { return m_crtc_state.interlaced_field; }
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
ALWAYS_INLINE u8 GetActiveLineLSB() const { return m_crtc_state.active_line_lsb; }
/// Updates drawing area that's suitablef or clamping.
void SetClampedDrawingArea();
@@ -296,11 +290,7 @@ private:
void FinishVRAMWrite();
/// Returns the number of vertices in the buffered poly-line.
ALWAYS_INLINE u32 GetPolyLineVertexCount() const
{
return (static_cast<u32>(m_polyline_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
BoolToUInt8(m_render_command.shading_enable);
}
u32 GetPolyLineVertexCount() const;
void AddCommandTicks(TickCount ticks);
@@ -320,99 +310,10 @@ private:
void FinishPolyline();
void FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand rc) const;
ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded,
bool textured, bool semitransparent)
{
// This will not produce the correct results for triangles which are partially outside the clip area.
// However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect
// the edges with the clip rectangle.
// TODO: Coordinates are exclusive, so off by one here...
const GSVector2i clamp_min = GSVector2i::load<true>(&m_clamped_drawing_area.x);
const GSVector2i clamp_max = GSVector2i::load<true>(&m_clamped_drawing_area.z);
v1 = v1.sat_s32(clamp_min, clamp_max);
v2 = v2.sat_s32(clamp_min, clamp_max);
v3 = v3.sat_s32(clamp_min, clamp_max);
TickCount pixels =
std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2);
if (textured)
pixels += pixels;
if (semitransparent || m_GPUSTAT.check_mask_before_draw)
pixels += (pixels + 1) / 2;
if (m_GPUSTAT.SkipDrawingToActiveField())
pixels /= 2;
AddCommandTicks(pixels);
}
ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent)
{
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();
u32 ticks_per_row = drawn_width;
if (textured)
{
switch (m_draw_mode.mode_reg.texture_mode)
{
case GPUTextureMode::Palette4Bit:
ticks_per_row += drawn_width;
break;
case GPUTextureMode::Palette8Bit:
{
// Texture cache reload every 2 pixels, reads in 8 bytes (assuming 4x2). Cache only reloads if the
// draw width is greater than 128, otherwise the cache hits between rows.
if (drawn_width > 128)
ticks_per_row += (drawn_width / 4) * 8;
else if ((drawn_width * drawn_height) > 2048)
ticks_per_row += ((drawn_width / 4) * (4 * (128 / drawn_width)));
else
ticks_per_row += drawn_width;
}
break;
case GPUTextureMode::Direct16Bit:
case GPUTextureMode::Reserved_Direct16Bit:
{
// Same as above, except with 2x2 blocks instead of 4x2.
if (drawn_width > 128)
ticks_per_row += (drawn_width / 2) * 8;
else if ((drawn_width * drawn_height) > 1024)
ticks_per_row += ((drawn_width / 4) * (8 * (128 / drawn_width)));
else
ticks_per_row += drawn_width;
}
break;
DefaultCaseIsUnreachable()
}
}
if (semitransparent || m_GPUSTAT.check_mask_before_draw)
ticks_per_row += (drawn_width + 1u) / 2u;
if (m_GPUSTAT.SkipDrawingToActiveField())
drawn_height = std::max<u32>(drawn_height / 2, 1u);
AddCommandTicks(ticks_per_row * drawn_height);
}
ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i rect, bool shaded)
{
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
// Needed because we're not multiplying either dimension.
if (clamped_rect.rempty())
return;
const u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();
if (m_GPUSTAT.SkipDrawingToActiveField())
drawn_height = std::max<u32>(drawn_height / 2, 1u);
AddCommandTicks(std::max(drawn_width, drawn_height));
}
void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded, bool textured,
bool semitransparent);
void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent);
void AddDrawLineTicks(const GSVector4i rect, bool shaded);
GPUSTAT m_GPUSTAT = {};
@@ -514,8 +415,11 @@ private:
bool in_hblank;
bool in_vblank;
u8 interlaced_field; // 0 = odd, 1 = even
/// 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...)
u8 interlaced_field;
u8 interlaced_display_field;
/// 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
u8 active_line_lsb;
ALWAYS_INLINE void UpdateHBlankFlag()

View File

@@ -344,6 +344,107 @@ void GPU::FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand
cmd->window = m_draw_mode.texture_window;
}
ALWAYS_INLINE u32 GPU::GetPolyLineVertexCount() const
{
return (static_cast<u32>(m_polyline_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
BoolToUInt8(m_render_command.shading_enable);
}
ALWAYS_INLINE_RELEASE void GPU::AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded,
bool textured, bool semitransparent)
{
// This will not produce the correct results for triangles which are partially outside the clip area.
// However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect
// the edges with the clip rectangle.
// TODO: Coordinates are exclusive, so off by one here...
const GSVector2i clamp_min = GSVector2i::load<true>(&m_clamped_drawing_area.x);
const GSVector2i clamp_max = GSVector2i::load<true>(&m_clamped_drawing_area.z);
v1 = v1.sat_s32(clamp_min, clamp_max);
v2 = v2.sat_s32(clamp_min, clamp_max);
v3 = v3.sat_s32(clamp_min, clamp_max);
TickCount pixels = std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2);
if (textured)
pixels += pixels;
if (semitransparent || m_GPUSTAT.check_mask_before_draw)
pixels += (pixels + 1) / 2;
if (m_GPUSTAT.SkipDrawingToActiveField())
pixels /= 2;
AddCommandTicks(pixels);
}
ALWAYS_INLINE_RELEASE void GPU::AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent)
{
const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect);
u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();
u32 ticks_per_row = drawn_width;
if (textured)
{
switch (m_draw_mode.mode_reg.texture_mode)
{
case GPUTextureMode::Palette4Bit:
ticks_per_row += drawn_width;
break;
case GPUTextureMode::Palette8Bit:
{
// Texture cache reload every 2 pixels, reads in 8 bytes (assuming 4x2). Cache only reloads if the
// draw width is greater than 128, otherwise the cache hits between rows.
if (drawn_width > 128)
ticks_per_row += (drawn_width / 4) * 8;
else if ((drawn_width * drawn_height) > 2048)
ticks_per_row += ((drawn_width / 4) * (4 * (128 / drawn_width)));
else
ticks_per_row += drawn_width;
}
break;
case GPUTextureMode::Direct16Bit:
case GPUTextureMode::Reserved_Direct16Bit:
{
// Same as above, except with 2x2 blocks instead of 4x2.
if (drawn_width > 128)
ticks_per_row += (drawn_width / 2) * 8;
else if ((drawn_width * drawn_height) > 1024)
ticks_per_row += ((drawn_width / 4) * (8 * (128 / drawn_width)));
else
ticks_per_row += drawn_width;
}
break;
DefaultCaseIsUnreachable()
}
}
if (semitransparent || m_GPUSTAT.check_mask_before_draw)
ticks_per_row += (drawn_width + 1u) / 2u;
if (m_GPUSTAT.SkipDrawingToActiveField())
drawn_height = std::max<u32>(drawn_height / 2, 1u);
AddCommandTicks(ticks_per_row * drawn_height);
}
ALWAYS_INLINE_RELEASE void GPU::AddDrawLineTicks(const GSVector4i rect, bool shaded)
{
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
// Needed because we're not multiplying either dimension.
if (clamped_rect.rempty())
return;
const u32 drawn_width = clamped_rect.width();
u32 drawn_height = clamped_rect.height();
if (m_GPUSTAT.SkipDrawingToActiveField())
drawn_height = std::max<u32>(drawn_height / 2, 1u);
AddCommandTicks(std::max(drawn_width, drawn_height));
}
bool GPU::HandleRenderPolygonCommand()
{
const GPURenderCommand rc{FifoPeek(0)};