From 0446e3c3f1bd908c8083cafe3d4469954c342297 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Mon, 5 May 2025 13:01:49 +0600 Subject: [PATCH] Optimize NDR `uop_CALL_INSTRUCTION_FUNC` by loading the fetchdat in one uOP instead of two Another 3-5% NDR improvement noticed on the WOLF3D MAPEDIT idle loop as a result. --- src/codegen_new/codegen.c | 3 +-- src/codegen_new/codegen_backend_arm64_uops.c | 1 + src/codegen_new/codegen_backend_arm_uops.c | 1 + src/codegen_new/codegen_backend_x86-64_uops.c | 5 +++++ src/codegen_new/codegen_backend_x86_uops.c | 1 + src/codegen_new/codegen_ir_defs.h | 6 +++--- 6 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/codegen_new/codegen.c b/src/codegen_new/codegen.c index 26a74016a..875dd72ca 100644 --- a/src/codegen_new/codegen.c +++ b/src/codegen_new/codegen.c @@ -746,8 +746,7 @@ codegen_skip: uop_MOV_PTR(ir, IREG_ea_seg, (void *) op_ea_seg); if (op_ssegs != last_op_ssegs) uop_MOV_IMM(ir, IREG_ssegs, op_ssegs); - uop_LOAD_FUNC_ARG_IMM(ir, 0, fetchdat); - uop_CALL_INSTRUCTION_FUNC(ir, op); + uop_CALL_INSTRUCTION_FUNC(ir, op, fetchdat); codegen_flags_changed = 0; codegen_mark_code_present(block, cs + cpu_state.pc, 8); diff --git a/src/codegen_new/codegen_backend_arm64_uops.c b/src/codegen_new/codegen_backend_arm64_uops.c index 82cc79cfd..2bb6281ff 100644 --- a/src/codegen_new/codegen_backend_arm64_uops.c +++ b/src/codegen_new/codegen_backend_arm64_uops.c @@ -218,6 +218,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_arm64_mov_imm(block, REG_ARG0, uop->imm_data); host_arm64_call(block, uop->p); host_arm64_CBNZ(block, REG_X0, (uintptr_t) codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_arm_uops.c b/src/codegen_new/codegen_backend_arm_uops.c index b6963562c..b186e0e3b 100644 --- a/src/codegen_new/codegen_backend_arm_uops.c +++ b/src/codegen_new/codegen_backend_arm_uops.c @@ -286,6 +286,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_arm_MOV_IMM(block, REG_ARG0, uop->imm_data); host_arm_call(block, uop->p); host_arm_TST_REG(block, REG_R0, REG_R0); host_arm_BNE(block, (uintptr_t) codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_x86-64_uops.c b/src/codegen_new/codegen_backend_x86-64_uops.c index 655896b54..6b68434a0 100644 --- a/src/codegen_new/codegen_backend_x86-64_uops.c +++ b/src/codegen_new/codegen_backend_x86-64_uops.c @@ -219,6 +219,11 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { +# if _WIN64 + host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data); +# else + host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data); +# endif host_x86_CALL(block, uop->p); host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_JNZ(block, codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_x86_uops.c b/src/codegen_new/codegen_backend_x86_uops.c index 02c441234..fad088822 100644 --- a/src/codegen_new/codegen_backend_x86_uops.c +++ b/src/codegen_new/codegen_backend_x86_uops.c @@ -221,6 +221,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_x86_MOV32_STACK_IMM(block, STACK_ARG0, uop->imm_data); host_x86_CALL(block, uop->p); host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_JNZ(block, codegen_exit_rout); diff --git a/src/codegen_new/codegen_ir_defs.h b/src/codegen_new/codegen_ir_defs.h index d55e57f3d..8c66b11f3 100644 --- a/src/codegen_new/codegen_ir_defs.h +++ b/src/codegen_new/codegen_ir_defs.h @@ -41,8 +41,8 @@ #define UOP_LOAD_FUNC_ARG_2_IMM (UOP_TYPE_PARAMS_IMM | 0x0a | UOP_TYPE_BARRIER) #define UOP_LOAD_FUNC_ARG_3_IMM (UOP_TYPE_PARAMS_IMM | 0x0b | UOP_TYPE_BARRIER) #define UOP_CALL_FUNC (UOP_TYPE_PARAMS_POINTER | 0x10 | UOP_TYPE_BARRIER) -/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p, check return value and exit block if non-zero*/ -#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | 0x11 | UOP_TYPE_BARRIER) +/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p with fetchdat, check return value and exit block if non-zero*/ +#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | UOP_TYPE_PARAMS_IMM | 0x11 | UOP_TYPE_BARRIER) #define UOP_STORE_P_IMM (UOP_TYPE_PARAMS_IMM | 0x12) #define UOP_STORE_P_IMM_8 (UOP_TYPE_PARAMS_IMM | 0x13) /*UOP_LOAD_SEG - load segment in src_reg_a to segment p via loadseg(), check return value and exit block if non-zero*/ @@ -662,7 +662,7 @@ uop_gen_reg_src2_pointer(uint32_t uop_type, ir_data_t *ir, int src_reg_a, int sr #define uop_CALL_FUNC(ir, p) uop_gen_pointer(UOP_CALL_FUNC, ir, p) #define uop_CALL_FUNC_RESULT(ir, dst_reg, p) uop_gen_reg_dst_pointer(UOP_CALL_FUNC_RESULT, ir, dst_reg, p) -#define uop_CALL_INSTRUCTION_FUNC(ir, p) uop_gen_pointer(UOP_CALL_INSTRUCTION_FUNC, ir, p) +#define uop_CALL_INSTRUCTION_FUNC(ir, p, imm) uop_gen_pointer_imm(UOP_CALL_INSTRUCTION_FUNC, ir, p, imm) #define uop_CMP_IMM_JZ(ir, src_reg, imm, p) uop_gen_reg_src_pointer_imm(UOP_CMP_IMM_JZ, ir, src_reg, p, imm)