System: Serialize PGXP state for runahead

This commit is contained in:
Stenzek
2025-10-04 17:48:30 +10:00
parent 10f9039dea
commit 3966a9eea7
4 changed files with 57 additions and 5 deletions

View File

@@ -14,6 +14,7 @@
#include "settings.h"
#include "util/gpu_device.h"
#include "util/state_wrapper.h"
#include "common/assert.h"
#include "common/log.h"
@@ -60,6 +61,8 @@ enum : u32
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
static bool ShouldSavePGXPState();
static double f16Sign(double val);
static double f16Unsign(double val);
static double f16Overflow(double val);
@@ -186,6 +189,42 @@ void CPU::PGXP::Shutdown()
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
}
bool CPU::PGXP::ShouldSavePGXPState()
{
// Only save PGXP state for runahead, not rewind.
// The performance impact is too great, and the glitches are much less noticeable with rewind.
return (g_settings.gpu_pgxp_enable && g_settings.IsRunaheadEnabled());
}
size_t CPU::PGXP::GetStateSize()
{
if (!ShouldSavePGXPState())
return 0;
const size_t base_size = sizeof(g_state.pgxp_gpr) + sizeof(g_state.pgxp_cop0) + sizeof(g_state.pgxp_gte) +
(sizeof(PGXPValue) * PGXP_MEM_SIZE);
const size_t vertex_cache_size = sizeof(PGXPValue) * VERTEX_CACHE_SIZE;
return base_size + (g_settings.gpu_pgxp_vertex_cache ? vertex_cache_size : 0);
}
void CPU::PGXP::DoState(StateWrapper& sw)
{
if (!ShouldSavePGXPState())
{
// Value checks will fail and fall back to imprecise geometry when using rewind.
return;
}
sw.DoBytes(g_state.pgxp_gpr, sizeof(g_state.pgxp_gpr));
sw.DoBytes(g_state.pgxp_cop0, sizeof(g_state.pgxp_cop0));
sw.DoBytes(g_state.pgxp_gte, sizeof(g_state.pgxp_gte));
sw.DoBytes(s_mem, sizeof(PGXPValue) * PGXP_MEM_SIZE);
if (s_vertex_cache)
sw.DoBytes(s_vertex_cache, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
{
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));

View File

@@ -11,6 +11,12 @@ void Initialize();
void Reset();
void Shutdown();
/// Returns memory usage to serialize additional PGXP state.
size_t GetStateSize();
/// Save/load additional PGXP state.
void DoState(StateWrapper& sw);
/// Vertex lookup from GPU side.
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w);

View File

@@ -2629,7 +2629,7 @@ bool System::AllocateMemoryStates(size_t state_count, bool recycle_old_textures)
// Allocate CPU buffers.
// TODO: Maybe look at host memory limits here...
const size_t size = GetMaxSaveStateSize();
const size_t size = GetMaxMemorySaveStateSize();
for (MemorySaveState& mss : s_state.memory_save_states)
{
mss.state_size = 0;
@@ -2781,10 +2781,8 @@ void System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_d
sw.Do(&s_state.internal_frame_number);
SAVE_COMPONENT("CPU", CPU::DoState(sw));
CPU::PGXP::DoState(sw);
// don't need to reset pgxp because the value checks will save us from broken rendering, and it
// saves using imprecise values for a frame in 30fps games.
// TODO: Save PGXP state to memory state instead. It'll be 8MB, but potentially worth it.
if (sw.IsReading())
CPU::CodeCache::InvalidateAllRAMBlocks();
@@ -2908,6 +2906,11 @@ size_t System::GetMaxSaveStateSize()
return is_8mb_ram ? MAX_8MB_SAVE_STATE_SIZE : MAX_2MB_SAVE_STATE_SIZE;
}
size_t System::GetMaxMemorySaveStateSize()
{
return GetMaxSaveStateSize() + CPU::PGXP::GetStateSize();
}
std::string System::GetMediaPathFromSaveState(const char* path)
{
SaveStateBuffer buffer;
@@ -4487,6 +4490,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_pgxp_texture_correction != old_settings.gpu_pgxp_texture_correction ||
g_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction ||
g_settings.gpu_pgxp_depth_buffer != old_settings.gpu_pgxp_depth_buffer ||
g_settings.gpu_pgxp_vertex_cache != old_settings.gpu_pgxp_vertex_cache ||
g_settings.display_active_start_offset != old_settings.display_active_start_offset ||
g_settings.display_active_end_offset != old_settings.display_active_end_offset ||
g_settings.display_line_start_offset != old_settings.display_line_start_offset ||
@@ -4938,7 +4942,7 @@ void System::LogUnsafeSettingsToConsole(const SmallStringBase& messages)
void System::CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64* ram_usage, u64* vram_usage)
{
const u64 real_resolution_scale = std::max<u64>(g_settings.gpu_resolution_scale, 1u);
*ram_usage = GetMaxSaveStateSize() * static_cast<u64>(num_saves);
*ram_usage = GetMaxMemorySaveStateSize() * static_cast<u64>(num_saves);
*vram_usage = ((VRAM_WIDTH * real_resolution_scale) * (VRAM_HEIGHT * real_resolution_scale) * 4) *
static_cast<u64>(g_settings.gpu_multisamples) * static_cast<u64>(num_saves);
}

View File

@@ -269,6 +269,9 @@ bool CanPauseSystem(bool display_message);
/// Returns the maximum size of a save state, considering the current configuration.
size_t GetMaxSaveStateSize();
/// Returns the maximum size of a save state that is not expected to be serialized to file.
size_t GetMaxMemorySaveStateSize();
/// Loads state from the specified path.
bool LoadState(const char* path, Error* error, bool save_undo_state, bool force_update_display);
bool SaveState(std::string path, Error* error, bool backup_existing_save, bool ignore_memcard_busy);