Optimize NDR uop_CALL_INSTRUCTION_FUNC by loading the fetchdat in one uOP instead of two

Another 3-5% NDR improvement noticed on the WOLF3D MAPEDIT idle loop as a result.
This commit is contained in:
Cacodemon345
2025-05-05 13:01:49 +06:00
parent 5584eb31a4
commit 0446e3c3f1
6 changed files with 12 additions and 5 deletions

View File

@@ -746,8 +746,7 @@ codegen_skip:
uop_MOV_PTR(ir, IREG_ea_seg, (void *) op_ea_seg); uop_MOV_PTR(ir, IREG_ea_seg, (void *) op_ea_seg);
if (op_ssegs != last_op_ssegs) if (op_ssegs != last_op_ssegs)
uop_MOV_IMM(ir, IREG_ssegs, op_ssegs); uop_MOV_IMM(ir, IREG_ssegs, op_ssegs);
uop_LOAD_FUNC_ARG_IMM(ir, 0, fetchdat); uop_CALL_INSTRUCTION_FUNC(ir, op, fetchdat);
uop_CALL_INSTRUCTION_FUNC(ir, op);
codegen_flags_changed = 0; codegen_flags_changed = 0;
codegen_mark_code_present(block, cs + cpu_state.pc, 8); codegen_mark_code_present(block, cs + cpu_state.pc, 8);

View File

@@ -218,6 +218,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
static int static int
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
{ {
host_arm64_mov_imm(block, REG_ARG0, uop->imm_data);
host_arm64_call(block, uop->p); host_arm64_call(block, uop->p);
host_arm64_CBNZ(block, REG_X0, (uintptr_t) codegen_exit_rout); host_arm64_CBNZ(block, REG_X0, (uintptr_t) codegen_exit_rout);

View File

@@ -286,6 +286,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
static int static int
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
{ {
host_arm_MOV_IMM(block, REG_ARG0, uop->imm_data);
host_arm_call(block, uop->p); host_arm_call(block, uop->p);
host_arm_TST_REG(block, REG_R0, REG_R0); host_arm_TST_REG(block, REG_R0, REG_R0);
host_arm_BNE(block, (uintptr_t) codegen_exit_rout); host_arm_BNE(block, (uintptr_t) codegen_exit_rout);

View File

@@ -219,6 +219,11 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
static int static int
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
{ {
# if _WIN64
host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data);
# else
host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data);
# endif
host_x86_CALL(block, uop->p); host_x86_CALL(block, uop->p);
host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_TEST32_REG(block, REG_EAX, REG_EAX);
host_x86_JNZ(block, codegen_exit_rout); host_x86_JNZ(block, codegen_exit_rout);

View File

@@ -221,6 +221,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop)
static int static int
codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop)
{ {
host_x86_MOV32_STACK_IMM(block, STACK_ARG0, uop->imm_data);
host_x86_CALL(block, uop->p); host_x86_CALL(block, uop->p);
host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_TEST32_REG(block, REG_EAX, REG_EAX);
host_x86_JNZ(block, codegen_exit_rout); host_x86_JNZ(block, codegen_exit_rout);

View File

@@ -41,8 +41,8 @@
#define UOP_LOAD_FUNC_ARG_2_IMM (UOP_TYPE_PARAMS_IMM | 0x0a | UOP_TYPE_BARRIER) #define UOP_LOAD_FUNC_ARG_2_IMM (UOP_TYPE_PARAMS_IMM | 0x0a | UOP_TYPE_BARRIER)
#define UOP_LOAD_FUNC_ARG_3_IMM (UOP_TYPE_PARAMS_IMM | 0x0b | UOP_TYPE_BARRIER) #define UOP_LOAD_FUNC_ARG_3_IMM (UOP_TYPE_PARAMS_IMM | 0x0b | UOP_TYPE_BARRIER)
#define UOP_CALL_FUNC (UOP_TYPE_PARAMS_POINTER | 0x10 | UOP_TYPE_BARRIER) #define UOP_CALL_FUNC (UOP_TYPE_PARAMS_POINTER | 0x10 | UOP_TYPE_BARRIER)
/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p, check return value and exit block if non-zero*/ /*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p with fetchdat, check return value and exit block if non-zero*/
#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | 0x11 | UOP_TYPE_BARRIER) #define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | UOP_TYPE_PARAMS_IMM | 0x11 | UOP_TYPE_BARRIER)
#define UOP_STORE_P_IMM (UOP_TYPE_PARAMS_IMM | 0x12) #define UOP_STORE_P_IMM (UOP_TYPE_PARAMS_IMM | 0x12)
#define UOP_STORE_P_IMM_8 (UOP_TYPE_PARAMS_IMM | 0x13) #define UOP_STORE_P_IMM_8 (UOP_TYPE_PARAMS_IMM | 0x13)
/*UOP_LOAD_SEG - load segment in src_reg_a to segment p via loadseg(), check return value and exit block if non-zero*/ /*UOP_LOAD_SEG - load segment in src_reg_a to segment p via loadseg(), check return value and exit block if non-zero*/
@@ -662,7 +662,7 @@ uop_gen_reg_src2_pointer(uint32_t uop_type, ir_data_t *ir, int src_reg_a, int sr
#define uop_CALL_FUNC(ir, p) uop_gen_pointer(UOP_CALL_FUNC, ir, p) #define uop_CALL_FUNC(ir, p) uop_gen_pointer(UOP_CALL_FUNC, ir, p)
#define uop_CALL_FUNC_RESULT(ir, dst_reg, p) uop_gen_reg_dst_pointer(UOP_CALL_FUNC_RESULT, ir, dst_reg, p) #define uop_CALL_FUNC_RESULT(ir, dst_reg, p) uop_gen_reg_dst_pointer(UOP_CALL_FUNC_RESULT, ir, dst_reg, p)
#define uop_CALL_INSTRUCTION_FUNC(ir, p) uop_gen_pointer(UOP_CALL_INSTRUCTION_FUNC, ir, p) #define uop_CALL_INSTRUCTION_FUNC(ir, p, imm) uop_gen_pointer_imm(UOP_CALL_INSTRUCTION_FUNC, ir, p, imm)
#define uop_CMP_IMM_JZ(ir, src_reg, imm, p) uop_gen_reg_src_pointer_imm(UOP_CMP_IMM_JZ, ir, src_reg, p, imm) #define uop_CMP_IMM_JZ(ir, src_reg, imm, p) uop_gen_reg_src_pointer_imm(UOP_CMP_IMM_JZ, ir, src_reg, p, imm)