Compare commits

...

3 Commits

Author SHA1 Message Date
Stenzek
b18c61f4b2 testing 2025-03-02 21:18:20 +10:00
Stenzek
fc90d84788 tailcall 2025-03-02 15:48:28 +10:00
Stenzek
872a48d616 testing 2025-03-02 15:17:09 +10:00
4 changed files with 2200 additions and 1009 deletions

View File

@@ -73,10 +73,9 @@ static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
static void AddBlockToPageList(Block* block);
static void RemoveBlockFromPageList(Block* block);
static Block* CreateCachedInterpreterBlock(u32 pc);
static void SetCachedInterpreterHandlers();
static void CompileCachedInterpreterBlock(const u32);
[[noreturn]] static void ExecuteCachedInterpreter();
template<PGXPMode pgxp_mode>
[[noreturn]] static void ExecuteCachedInterpreterImpl();
// Fast map provides lookup from PC to function
// Function pointers are offset so that you don't need to subtract
@@ -216,6 +215,12 @@ void CPU::CodeCache::Reset()
CompileASMFunctions();
ResetCodeLUT();
}
else
{
SetCachedInterpreterHandlers();
ResetCodeBuffer();
ResetCodeLUT();
}
}
void CPU::CodeCache::Shutdown()
@@ -708,15 +713,104 @@ PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exceptio
// MARK: - Cached Interpreter
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc)
namespace CPU::CodeCache::CachedInterpreterFunctions {
static DEFINE_CACHED_INTERPRETER_HANDLER(CompileOrRevalidateBlock);
static DEFINE_CACHED_INTERPRETER_HANDLER(LookupAndExecuteBlock);
} // namespace CPU::CodeCache::CachedInterpreterFunctions
DEFINE_CACHED_INTERPRETER_HANDLER(CPU::CodeCache::CachedInterpreterFunctions::LookupAndExecuteBlock)
{
BlockMetadata metadata = {};
ReadBlockInstructions(pc, &s_block_instructions, &metadata);
return CreateBlock(pc, s_block_instructions, metadata);
const u32 pc = g_state.pc;
const u32 table = pc >> LUT_TABLE_SHIFT;
const u32 idx = (pc & 0xFFFF) >> 2;
const CachedInterpreterInstruction* cinst =
reinterpret_cast<const CachedInterpreterInstruction*>(g_code_lut[table][idx]);
#ifdef HAS_MUSTTAIL
RETURN_MUSTTAIL(cinst->handler(cinst));
#else
do
{
cinst = cinst->handler(cinst);
} while (cinst);
return nullptr;
#endif
}
template<PGXPMode pgxp_mode>
[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl()
DEFINE_CACHED_INTERPRETER_HANDLER(CPU::CodeCache::CachedInterpreterFunctions::CompileOrRevalidateBlock)
{
const u32 start_pc = g_state.pc;
MemMap::BeginCodeWrite();
// Revalidation
Block* block = LookupBlock(start_pc);
if (block)
{
// we should only be here if the block got invalidated
DebugAssert(block->state != BlockState::Valid);
if (RevalidateBlock(block))
{
DebugAssert(block->host_code);
SetCodeLUT(start_pc, block->host_code);
BacklinkBlocks(start_pc, block->host_code);
MemMap::EndCodeWrite();
CACHED_INTERPRETER_HANDLER_RETURN(static_cast<const CachedInterpreterInstruction*>(block->host_code));
}
// remove outward links from this block, since we're recompiling it
UnlinkBlockExits(block);
}
BlockMetadata metadata = {};
if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata))
{
ERROR_LOG("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc);
Panic("Fixme");
}
// TODO: size calc is wrong, should use max insn size
const u32 required_space = sizeof(CachedInterpreterInstruction) * (static_cast<u32>(s_block_instructions.size()) + 3);
if (GetFreeCodeSpace() < required_space)
{
ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
CodeCache::Reset();
}
block = CreateBlock(start_pc, s_block_instructions, metadata);
if (!block)
{
Panic("Fixme");
}
CachedInterpreterCompiler compiler(block, reinterpret_cast<CachedInterpreterInstruction*>(GetFreeCodePointer()));
if (!compiler.CompileBlock())
Panic("Fixme");
block->host_code = compiler.GetCodeStart();
block->host_code_size = compiler.GetCodeSize();
CommitCode(block->host_code_size);
SetCodeLUT(start_pc, block->host_code);
BacklinkBlocks(start_pc, block->host_code);
MemMap::EndCodeWrite();
// TODO: Block linking!
CACHED_INTERPRETER_HANDLER_RETURN(static_cast<const CachedInterpreterInstruction*>(block->host_code));
}
void CPU::CodeCache::SetCachedInterpreterHandlers()
{
static constexpr const CachedInterpreterInstruction compile_or_revalidate_block_seq = {
&CachedInterpreterFunctions::CompileOrRevalidateBlock};
static constexpr const CachedInterpreterInstruction lookup_and_execute_block_seq = {
&CachedInterpreterFunctions::LookupAndExecuteBlock};
g_compile_or_revalidate_block = &compile_or_revalidate_block_seq;
g_dispatcher = &lookup_and_execute_block_seq;
}
[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter()
{
#define CHECK_DOWNCOUNT() \
if (g_state.pending_ticks >= g_state.downcount) \
@@ -733,98 +827,45 @@ template<PGXPMode pgxp_mode>
LogCurrentState();
#endif
#if 0
if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214)
if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 108345628)
__debugbreak();
#endif
// Manually done because we don't want to compile blocks without a LUT.
const u32 pc = g_state.pc;
const u32 table = pc >> LUT_TABLE_SHIFT;
Block* block;
if (s_block_lut[table])
{
const u32 idx = (pc & 0xFFFF) >> 2;
block = s_block_lut[table][idx];
}
else
{
// Likely invalid code...
goto interpret_block;
}
const u32 idx = (pc & 0xFFFF) >> 2;
const CachedInterpreterInstruction* cinst =
reinterpret_cast<const CachedInterpreterInstruction*>(g_code_lut[table][idx]);
reexecute_block:
if (!block)
{
if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
goto interpret_block;
}
else
{
if (block->state == BlockState::FallbackToInterpreter) [[unlikely]]
goto interpret_block;
if ((block->state != BlockState::Valid && !RevalidateBlock(block)) ||
(block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block)))
{
if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
goto interpret_block;
}
}
// Execute block.
DebugAssert(!(HasPendingInterrupt()));
if (block->HasFlag(BlockFlags::IsUsingICache))
{
CheckAndUpdateICacheTags(block->icache_line_count);
}
else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
{
AddPendingTicks(
static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(
block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word))));
}
else
{
AddPendingTicks(block->uncached_fetch_ticks);
}
InterpretCachedBlock<pgxp_mode>(block);
#ifdef HAS_MUSTTAIL
cinst->handler(cinst);
#else
do
{
cinst = cinst->handler(cinst);
} while (cinst);
#endif
CHECK_DOWNCOUNT();
// Handle self-looping blocks
if (g_state.pc == block->pc)
goto reexecute_block;
else
continue;
interpret_block:
InterpretUncachedBlock<pgxp_mode>();
CHECK_DOWNCOUNT();
continue;
// if (g_state.pc == pc)
// goto reexecute_block;
}
TimingEvents::RunEvents();
}
}
[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteCachedInterpreterImpl<PGXPMode::CPU>();
else
ExecuteCachedInterpreterImpl<PGXPMode::Memory>();
}
else
{
ExecuteCachedInterpreterImpl<PGXPMode::Disabled>();
}
}
void CPU::CodeCache::LogCurrentState()
{
#if 0
if (System::GetGlobalTickCounter() == 2546728915)
if (System::GetGlobalTickCounter() == 9953322268)
__debugbreak();
#endif
#if 0
@@ -1414,11 +1455,25 @@ void CPU::CodeCache::BacklinkBlocks(u32 pc, const void* dst)
return;
const auto link_range = s_block_links.equal_range(pc);
for (auto it = link_range.first; it != link_range.second; ++it)
if (IsUsingRecompiler())
{
DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst,
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
EmitJump(it->second, dst, true);
for (auto it = link_range.first; it != link_range.second; ++it)
{
DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst,
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
EmitJump(it->second, dst, true);
}
}
else
{
// TODO: maybe move this up to the compiler function
for (auto it = link_range.first; it != link_range.second; ++it)
{
DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst,
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
std::memcpy(it->second, &dst, sizeof(void*));
}
}
}

View File

@@ -13,6 +13,11 @@
#include <array>
#include <unordered_map>
#ifdef __clang__
#define HAS_MUSTTAIL 1
#define RETURN_MUSTTAIL(val) __attribute__((musttail)) return val
#endif
namespace CPU::CodeCache {
enum : u32
@@ -205,8 +210,109 @@ struct PageProtectionInfo
};
static_assert(sizeof(PageProtectionInfo) == (sizeof(Block*) * 2 + 8));
template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const Block* block);
struct CachedInterpreterInstruction;
#ifdef HAS_MUSTTAIL
using CachedInterpreterHandler = void (*)(const CachedInterpreterInstruction*);
#define DEFINE_CACHED_INTERPRETER_HANDLER(name) void name(const CPU::CodeCache::CachedInterpreterInstruction* cbaseinst)
#define CACHED_INTERPRETER_INSTRUCTION_TYPE(type) const type* cinst = static_cast<const type*>(cbaseinst)
#define CACHED_INTERPRETER_HANDLER_RETURN(value) RETURN_MUSTTAIL(value->handler(value))
#define END_CACHED_INTERPRETER_INSTRUCTION() CACHED_INTERPRETER_HANDLER_RETURN((cinst + 1))
#else
using CachedInterpreterHandler = const CachedInterpreterInstruction* (*)(const CachedInterpreterInstruction*);
#define DEFINE_CACHED_INTERPRETER_HANDLER(name) \
const CPU::CodeCache::CachedInterpreterInstruction* name( \
const CPU::CodeCache::CachedInterpreterInstruction* cbaseinst)
#define CACHED_INTERPRETER_INSTRUCTION_TYPE(type) const type* cinst = static_cast<const type*>(cbaseinst)
#define CACHED_INTERPRETER_HANDLER_RETURN(value) return value
#define END_CACHED_INTERPRETER_INSTRUCTION() CACHED_INTERPRETER_HANDLER_RETURN((cinst + 1))
#endif
struct CachedInterpreterInstruction
{
CachedInterpreterHandler handler;
};
static_assert(sizeof(CachedInterpreterInstruction) == sizeof(CachedInterpreterHandler));
struct CachedInterpreterIntArgInstruction : CachedInterpreterInstruction
{
u32 arg;
};
struct CachedInterpreterMIPSInstruction : CachedInterpreterInstruction
{
Instruction inst;
u32 pc;
};
struct CachedInterpreterBlockLinkInstruction : CachedInterpreterInstruction
{
const CachedInterpreterInstruction* target;
u32 target_pc;
};
struct CachedInterpreterConditionalBranchInstruction : CachedInterpreterMIPSInstruction
{
const CachedInterpreterInstruction* not_taken_target;
};
class CachedInterpreterCompiler
{
public:
CachedInterpreterCompiler(Block* block, CachedInterpreterInstruction* cinst);
CachedInterpreterInstruction* GetCodeStart() const { return m_code_start; }
u32 GetCodeSize() const
{
return static_cast<u32>(reinterpret_cast<u8*>(m_code_ptr) - reinterpret_cast<u8*>(m_code_start));
}
bool CompileBlock();
private:
bool CompileInstruction();
bool CompileBranchDelaySlot();
bool CompileUnconditionalBranch();
bool CompileConditionalBranch();
bool CompileIndirectBranch();
void BackupState();
void RestoreState();
void AddBlockLinkInstruction(u32 target_pc);
template<typename T>
T* AddInstruction()
{
T* ret = static_cast<T*>(m_code_ptr);
m_code_ptr = (ret + 1);
return ret;
}
Block* m_block = nullptr;
CachedInterpreterInstruction* m_code_start = nullptr;
CachedInterpreterInstruction* m_code_ptr = nullptr;
const Instruction* inst = nullptr;
const InstructionInfo* iinfo = nullptr;
u32 m_compiler_pc = 0;
u32 m_current_instruction_pc = 0;
bool m_block_ended = false;
bool m_has_load_delay = false;
struct StateBackup
{
const Instruction* inst;
const InstructionInfo* iinfo;
u32 compiler_pc;
u32 current_instruction_pc;
bool block_ended;
bool has_load_delay;
};
StateBackup m_state_backup = {};
};
template<PGXPMode pgxp_mode>
void InterpretUncachedBlock();

File diff suppressed because it is too large Load Diff

View File

@@ -168,4 +168,79 @@ void UncheckedWriteMemoryWord(u32 address, u32 value);
#endif
// clang-format off
#define CPU_FOR_EACH_INSTRUCTION(X, eX) \
X(b) \
X(j) \
X(jal) \
X(beq) \
X(bne) \
X(blez) \
X(bgtz) \
eX(addi) \
X(addiu) \
X(slti) \
X(sltiu) \
X(andi) \
X(ori) \
X(xori) \
X(lui) \
eX(lb) \
eX(lh) \
eX(lwl) \
eX(lw) \
eX(lbu) \
eX(lhu) \
eX(lwr) \
eX(sb) \
eX(sh) \
eX(swl) \
eX(sw) \
eX(swr) \
eX(mfc0) \
eX(mtc0) \
X(rfe) \
X(mfc2) \
X(mtc2) \
X(cfc2) \
X(ctc2) \
X(cop2) \
eX(lwc0) \
eX(lwc1) \
eX(lwc2) \
eX(lwc3) \
eX(swc0) \
eX(swc1) \
eX(swc2) \
eX(swc3) \
X(sll) \
X(srl) \
X(sra) \
X(sllv) \
X(srlv) \
X(srav) \
X(jr) \
X(jalr) \
X(syscall) \
X(break)\
X(mfhi) \
X(mthi) \
X(mflo) \
X(mtlo) \
X(mult) \
X(multu) \
X(div) \
X(divu) \
eX(add) \
X(addu) \
eX(sub) \
X(subu) \
X(and) \
X(or) \
X(xor) \
X(nor) \
X(slt) \
X(sltu)
// clang-format on
} // namespace CPU