Optimize NDR uop_CALL_INSTRUCTION_FUNC by loading the fetchdat in one uOP instead of two
Another 3-5% NDR improvement noticed on the WOLF3D MAPEDIT idle loop as a result.
This commit is contained in:
@@ -746,8 +746,7 @@ codegen_skip:
|
||||
uop_MOV_PTR(ir, IREG_ea_seg, (void *) op_ea_seg);
|
||||
if (op_ssegs != last_op_ssegs)
|
||||
uop_MOV_IMM(ir, IREG_ssegs, op_ssegs);
|
||||
uop_LOAD_FUNC_ARG_IMM(ir, 0, fetchdat);
|
||||
uop_CALL_INSTRUCTION_FUNC(ir, op);
|
||||
uop_CALL_INSTRUCTION_FUNC(ir, op, fetchdat);
|
||||
codegen_flags_changed = 0;
|
||||
codegen_mark_code_present(block, cs + cpu_state.pc, 8);
|
||||
|
||||
|
||||
@@ -218,6 +218,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
|
||||
static int
|
||||
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
|
||||
{
|
||||
host_arm64_mov_imm(block, REG_ARG0, uop->imm_data);
|
||||
host_arm64_call(block, uop->p);
|
||||
host_arm64_CBNZ(block, REG_X0, (uintptr_t) codegen_exit_rout);
|
||||
|
||||
|
||||
@@ -286,6 +286,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
|
||||
static int
|
||||
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
|
||||
{
|
||||
host_arm_MOV_IMM(block, REG_ARG0, uop->imm_data);
|
||||
host_arm_call(block, uop->p);
|
||||
host_arm_TST_REG(block, REG_R0, REG_R0);
|
||||
host_arm_BNE(block, (uintptr_t) codegen_exit_rout);
|
||||
|
||||
@@ -219,6 +219,11 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
|
||||
static int
|
||||
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
|
||||
{
|
||||
# if _WIN64
|
||||
host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data);
|
||||
# else
|
||||
host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data);
|
||||
# endif
|
||||
host_x86_CALL(block, uop->p);
|
||||
host_x86_TEST32_REG(block, REG_EAX, REG_EAX);
|
||||
host_x86_JNZ(block, codegen_exit_rout);
|
||||
|
||||
@@ -221,6 +221,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
|
||||
static int
|
||||
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
|
||||
{
|
||||
host_x86_MOV32_STACK_IMM(block, STACK_ARG0, uop->imm_data);
|
||||
host_x86_CALL(block, uop->p);
|
||||
host_x86_TEST32_REG(block, REG_EAX, REG_EAX);
|
||||
host_x86_JNZ(block, codegen_exit_rout);
|
||||
|
||||
@@ -41,8 +41,8 @@
|
||||
#define UOP_LOAD_FUNC_ARG_2_IMM (UOP_TYPE_PARAMS_IMM | 0x0a | UOP_TYPE_BARRIER)
|
||||
#define UOP_LOAD_FUNC_ARG_3_IMM (UOP_TYPE_PARAMS_IMM | 0x0b | UOP_TYPE_BARRIER)
|
||||
#define UOP_CALL_FUNC (UOP_TYPE_PARAMS_POINTER | 0x10 | UOP_TYPE_BARRIER)
|
||||
/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p, check return value and exit block if non-zero*/
|
||||
#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | 0x11 | UOP_TYPE_BARRIER)
|
||||
/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p with fetchdat, check return value and exit block if non-zero*/
|
||||
#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | UOP_TYPE_PARAMS_IMM | 0x11 | UOP_TYPE_BARRIER)
|
||||
#define UOP_STORE_P_IMM (UOP_TYPE_PARAMS_IMM | 0x12)
|
||||
#define UOP_STORE_P_IMM_8 (UOP_TYPE_PARAMS_IMM | 0x13)
|
||||
/*UOP_LOAD_SEG - load segment in src_reg_a to segment p via loadseg(), check return value and exit block if non-zero*/
|
||||
@@ -662,7 +662,7 @@ uop_gen_reg_src2_pointer(uint32_t uop_type, ir_data_t *ir, int src_reg_a, int sr
|
||||
|
||||
#define uop_CALL_FUNC(ir, p) uop_gen_pointer(UOP_CALL_FUNC, ir, p)
|
||||
#define uop_CALL_FUNC_RESULT(ir, dst_reg, p) uop_gen_reg_dst_pointer(UOP_CALL_FUNC_RESULT, ir, dst_reg, p)
|
||||
#define uop_CALL_INSTRUCTION_FUNC(ir, p) uop_gen_pointer(UOP_CALL_INSTRUCTION_FUNC, ir, p)
|
||||
#define uop_CALL_INSTRUCTION_FUNC(ir, p, imm) uop_gen_pointer_imm(UOP_CALL_INSTRUCTION_FUNC, ir, p, imm)
|
||||
|
||||
#define uop_CMP_IMM_JZ(ir, src_reg, imm, p) uop_gen_reg_src_pointer_imm(UOP_CMP_IMM_JZ, ir, src_reg, p, imm)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user