diff --git a/src/codegen_new/codegen.c b/src/codegen_new/codegen.c index 26a74016a..875dd72ca 100644 --- a/src/codegen_new/codegen.c +++ b/src/codegen_new/codegen.c @@ -746,8 +746,7 @@ codegen_skip: uop_MOV_PTR(ir, IREG_ea_seg, (void *) op_ea_seg); if (op_ssegs != last_op_ssegs) uop_MOV_IMM(ir, IREG_ssegs, op_ssegs); - uop_LOAD_FUNC_ARG_IMM(ir, 0, fetchdat); - uop_CALL_INSTRUCTION_FUNC(ir, op); + uop_CALL_INSTRUCTION_FUNC(ir, op, fetchdat); codegen_flags_changed = 0; codegen_mark_code_present(block, cs + cpu_state.pc, 8); diff --git a/src/codegen_new/codegen_backend_arm64_uops.c b/src/codegen_new/codegen_backend_arm64_uops.c index 82cc79cfd..2bb6281ff 100644 --- a/src/codegen_new/codegen_backend_arm64_uops.c +++ b/src/codegen_new/codegen_backend_arm64_uops.c @@ -218,6 +218,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_arm64_mov_imm(block, REG_ARG0, uop->imm_data); host_arm64_call(block, uop->p); host_arm64_CBNZ(block, REG_X0, (uintptr_t) codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_arm_uops.c b/src/codegen_new/codegen_backend_arm_uops.c index b6963562c..b186e0e3b 100644 --- a/src/codegen_new/codegen_backend_arm_uops.c +++ b/src/codegen_new/codegen_backend_arm_uops.c @@ -286,6 +286,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_arm_MOV_IMM(block, REG_ARG0, uop->imm_data); host_arm_call(block, uop->p); host_arm_TST_REG(block, REG_R0, REG_R0); host_arm_BNE(block, (uintptr_t) codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_x86-64_ops.c b/src/codegen_new/codegen_backend_x86-64_ops.c index 9ac8d2474..9f89012c6 100644 --- a/src/codegen_new/codegen_backend_x86-64_ops.c +++ b/src/codegen_new/codegen_backend_x86-64_ops.c @@ -68,8 +68,10 @@ jmp(codeblock_t *block, uintptr_t func) void host_x86_ADD8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_ADD8_REG_IMM - dst_reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 2); @@ -82,8 +84,10 @@ host_x86_ADD8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) void host_x86_ADD16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_ADD16_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -101,8 +105,10 @@ host_x86_ADD16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) void host_x86_ADD32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_ADD32_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 3); @@ -120,8 +126,10 @@ host_x86_ADD32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_ADD64_REG_IMM(codeblock_t *block, int dst_reg, uint64_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_ADD64_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -132,8 +140,10 @@ host_x86_ADD64_REG_IMM(codeblock_t *block, int dst_reg, uint64_t imm_data) void host_x86_ADD8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_ADD8_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x00, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*ADD dst_reg, src_reg*/ @@ -141,8 +151,10 @@ host_x86_ADD8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_ADD16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_ADD16_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x01, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*ADD dst_reg, src_reg*/ @@ -150,8 +162,10 @@ host_x86_ADD16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_ADD32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_ADD32_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x01, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*ADD dst_reg, src_reg*/ @@ -160,8 +174,10 @@ host_x86_ADD32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_AND8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_AND8_REG_IMM - dst_reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 2); @@ -174,8 +190,10 @@ host_x86_AND8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) void host_x86_AND16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_AND16_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -193,8 +211,10 @@ host_x86_AND16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) void host_x86_AND32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_AND32_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 3); @@ -212,8 +232,10 @@ host_x86_AND32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_AND8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_AND8_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x20, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*AND dst_reg, src_reg*/ @@ -221,8 +243,10 @@ host_x86_AND8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_AND16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_AND16_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x21, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*AND dst_reg, src_reg*/ @@ -230,8 +254,10 @@ host_x86_AND16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_AND32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_AND32_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x21, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*AND dst_reg, src_reg*/ @@ -482,8 +508,10 @@ host_x86_LEA_REG_IMM(codeblock_t *block, int dst_reg, int src_reg, uint32_t offs void host_x86_LEA_REG_REG(codeblock_t *block, int dst_reg, int src_reg_a, int src_reg_b) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg_a & 8) || (src_reg_b & 8)) fatal("host_x86_LEA_REG_REG - bad reg\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x8d, 0x04 | ((dst_reg & 7) << 3), /*LEA dst_reg, [Rsrc_reg_a + Rsrc_reg_b]*/ @@ -492,8 +520,10 @@ host_x86_LEA_REG_REG(codeblock_t *block, int dst_reg, int src_reg_a, int src_reg void host_x86_LEA_REG_REG_SHIFT(codeblock_t *block, int dst_reg, int src_reg_a, int src_reg_b, int shift) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg_a & 8) || (src_reg_b & 8)) fatal("host_x86_LEA_REG_REG_SHIFT - bad reg\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x8d, 0x04 | ((dst_reg & 7) << 3), /*LEA dst_reg, [Rsrc_reg_a + Rsrc_reg_b * (1 << shift)]*/ @@ -575,8 +605,10 @@ host_x86_MOV8_ABS_REG(codeblock_t *block, void *p, int src_reg) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); +#ifdef RECOMPILER_DEBUG if (src_reg & 8) fatal("host_x86_MOV8_ABS_REG - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); @@ -599,8 +631,10 @@ host_x86_MOV16_ABS_REG(codeblock_t *block, void *p, int src_reg) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); +#ifdef RECOMPILER_DEBUG if (src_reg & 8) fatal("host_x86_MOV16_ABS_REG - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); @@ -619,8 +653,10 @@ host_x86_MOV32_ABS_REG(codeblock_t *block, void *p, int src_reg) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); +#ifdef RECOMPILER_DEBUG if (src_reg & 8) fatal("host_x86_MOV32_ABS_REG - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); @@ -643,8 +679,10 @@ host_x86_MOV64_ABS_REG(codeblock_t *block, void *p, int src_reg) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); +#ifdef RECOMPILER_DEBUG if (src_reg & 8) fatal("host_x86_MOV64_ABS_REG - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); @@ -665,8 +703,11 @@ host_x86_MOV64_ABS_REG(codeblock_t *block, void *p, int src_reg) void host_x86_MOV8_ABS_REG_REG_SHIFT_REG(codeblock_t *block, uint32_t addr, int base_reg, int index_reg, int shift, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV8_BASE_INDEX_REG reg & 8\n"); +#endif + if (addr < 0x80 || addr >= 0xffffff80) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x88, 0x44 | (src_reg << 3), base_reg | (index_reg << 3) | (shift << 6), addr & 0xff); /*MOV addr[base_reg + idx_reg << shift], src_reg*/ @@ -680,24 +721,30 @@ host_x86_MOV8_ABS_REG_REG_SHIFT_REG(codeblock_t *block, uint32_t addr, int base_ void host_x86_MOV8_BASE_INDEX_REG(codeblock_t *block, int base_reg, int index_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV8_BASE_INDEX_REG reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x88, 0x04 | (src_reg << 3), (index_reg << 3) | base_reg); /*MOV B[base_reg + index_reg], src_reg*/ } void host_x86_MOV16_BASE_INDEX_REG(codeblock_t *block, int base_reg, int index_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV8_BASE_INDEX_REG reg & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0x89, 0x04 | (src_reg << 3), (index_reg << 3) | base_reg); /*MOV W[base_reg + index_reg], src_reg*/ } void host_x86_MOV32_BASE_INDEX_REG(codeblock_t *block, int base_reg, int index_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV8_BASE_INDEX_REG reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x89, 0x04 | (src_reg << 3), (index_reg << 3) | base_reg); /*MOV L[base_reg + index_reg], src_reg*/ } @@ -708,8 +755,10 @@ host_x86_MOV8_REG_ABS(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOV8_REG_ABS reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); @@ -736,8 +785,10 @@ host_x86_MOV16_REG_ABS(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOV16_REG_ABS reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); @@ -766,8 +817,10 @@ host_x86_MOV32_REG_ABS(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOV32_REG_ABS reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); @@ -798,8 +851,10 @@ host_x86_MOV64_REG_ABS(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOV64_REG_ABS reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); @@ -815,8 +870,10 @@ host_x86_MOV64_REG_ABS(codeblock_t *block, int dst_reg, void *p) void host_x86_MOV8_REG_ABS_REG_REG_SHIFT(codeblock_t *block, int dst_reg, uint32_t addr, int base_reg, int index_reg, int shift) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV8_REG_ABS_REG_REG_SHIFT reg & 8\n"); +#endif if (addr < 0x80 || addr >= 0xffffff80) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x8a, 0x44 | (dst_reg << 3), base_reg | (index_reg << 3) | (shift << 6), addr & 0xff); /*MOV addr[base_reg + idx_reg << shift], src_reg*/ @@ -830,8 +887,10 @@ host_x86_MOV8_REG_ABS_REG_REG_SHIFT(codeblock_t *block, int dst_reg, uint32_t ad void host_x86_MOV32_REG_BASE_INDEX(codeblock_t *block, int dst_reg, int base_reg, int index_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOV32_REG_BASE_INDEX reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x8b, 0x04 | (dst_reg << 3), (index_reg << 3) | base_reg); /*MOV dst_reg, Q[base_reg + index_reg]*/ } @@ -839,8 +898,10 @@ host_x86_MOV32_REG_BASE_INDEX(codeblock_t *block, int dst_reg, int base_reg, int void host_x86_MOV64_REG_BASE_INDEX_SHIFT(codeblock_t *block, int dst_reg, int base_reg, int index_reg, int scale) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (index_reg & 8)) fatal("host_x86_MOV64_REG_BASE_INDEX_SHIFT reg & 8\n"); +#endif codegen_alloc_bytes(block, 4); if (base_reg & 8) codegen_addbyte4(block, 0x49, 0x8b, 0x04 | ((dst_reg & 7) << 3), (scale << 6) | ((index_reg & 7) << 3) | (base_reg & 7)); /*MOV dst_reg, Q[base_reg + index_reg << scale]*/ @@ -851,8 +912,10 @@ host_x86_MOV64_REG_BASE_INDEX_SHIFT(codeblock_t *block, int dst_reg, int base_re void host_x86_MOV16_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, int offset) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV16_REG_BASE_OFFSET reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -869,8 +932,10 @@ host_x86_MOV16_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in void host_x86_MOV32_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, int offset) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV32_REG_BASE_OFFSET reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -886,8 +951,10 @@ host_x86_MOV32_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in void host_x86_MOV64_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, int offset) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV64_REG_BASE_OFFSET reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -905,8 +972,10 @@ host_x86_MOV64_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in void host_x86_MOV32_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV32_BASE_OFFSET_REG reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -922,8 +991,10 @@ host_x86_MOV32_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int void host_x86_MOV64_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((src_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV64_BASE_OFFSET_REG reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -941,8 +1012,10 @@ host_x86_MOV64_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int void host_x86_MOV32_BASE_OFFSET_IMM(codeblock_t *block, int base_reg, int offset, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (base_reg & 8) fatal("host_x86_MOV32_BASE_OFFSET_IMM reg & 8\n"); +#endif if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { @@ -961,16 +1034,21 @@ host_x86_MOV32_BASE_OFFSET_IMM(codeblock_t *block, int base_reg, int offset, uin void host_x86_MOV8_REG_IMM(codeblock_t *block, int reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (reg >= 8) fatal("host_x86_MOV8_REG_IMM reg >= 4\n"); +#endif + codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xb0 | reg, imm_data); /*MOV reg, imm_data*/ } void host_x86_MOV16_REG_IMM(codeblock_t *block, int reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (reg & 8) fatal("host_x86_MOV16_REG_IMM reg & 8\n"); +#endif codegen_alloc_bytes(block, 6); codegen_addbyte2(block, 0x66, 0xb8 | (reg & 7)); /*MOV reg, imm_data*/ codegen_addword(block, imm_data); @@ -978,8 +1056,10 @@ host_x86_MOV16_REG_IMM(codeblock_t *block, int reg, uint16_t imm_data) void host_x86_MOV32_REG_IMM(codeblock_t *block, int reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (reg & 8) fatal("host_x86_MOV32_REG_IMM reg & 8\n"); +#endif codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0xb8 | (reg & 7)); /*MOV reg, imm_data*/ codegen_addlong(block, imm_data); @@ -1002,8 +1082,10 @@ host_x86_MOV64_REG_IMM(codeblock_t *block, int reg, uint64_t imm_data) void host_x86_MOV8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOV8_REG_REG - bad reg\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x88, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); @@ -1011,8 +1093,10 @@ host_x86_MOV8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_MOV16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOV16_REG_REG - bad reg\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x89, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); @@ -1020,8 +1104,10 @@ host_x86_MOV16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_MOV32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOV32_REG_REG - bad reg\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x89, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); @@ -1068,8 +1154,10 @@ host_x86_MOVSX_REG_32_16(codeblock_t *block, int dst_reg, int src_reg) void host_x86_MOVZX_BASE_INDEX_32_8(codeblock_t *block, int dst_reg, int base_reg, int index_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOVZX_BASE_INDEX_32_8 reg & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x0f, 0xb6, 0x04 | (dst_reg << 3), (index_reg << 3) | base_reg); @@ -1077,8 +1165,10 @@ host_x86_MOVZX_BASE_INDEX_32_8(codeblock_t *block, int dst_reg, int base_reg, in void host_x86_MOVZX_BASE_INDEX_32_16(codeblock_t *block, int dst_reg, int base_reg, int index_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (base_reg & 8) | (index_reg & 8)) fatal("host_x86_MOVZX_BASE_INDEX_32_16 reg & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x0f, 0xb7, 0x04 | (dst_reg << 3), (index_reg << 3) | base_reg); @@ -1087,8 +1177,10 @@ host_x86_MOVZX_BASE_INDEX_32_16(codeblock_t *block, int dst_reg, int base_reg, i void host_x86_MOVZX_REG_16_8(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOVZX_REG_16_8 - bad reg\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0x0f, 0xb6, 0xc0 | (dst_reg << 3) | src_reg); /*MOVZX dst_reg, src_reg*/ @@ -1096,8 +1188,10 @@ host_x86_MOVZX_REG_16_8(codeblock_t *block, int dst_reg, int src_reg) void host_x86_MOVZX_REG_32_8(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOVZX_REG_32_8 - bad reg\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x0f, 0xb6, 0xc0 | (dst_reg << 3) | src_reg); /*MOVZX dst_reg, src_reg*/ @@ -1105,8 +1199,10 @@ host_x86_MOVZX_REG_32_8(codeblock_t *block, int dst_reg, int src_reg) void host_x86_MOVZX_REG_32_16(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_MOVZX_REG_16_8 - bad reg\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x0f, 0xb7, 0xc0 | (dst_reg << 3) | src_reg); /*MOVZX dst_reg, src_reg*/ @@ -1118,8 +1214,10 @@ host_x86_MOVZX_REG_ABS_16_8(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_16_8 - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 5); @@ -1145,7 +1243,7 @@ host_x86_MOVZX_REG_ABS_32_8(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); -#if 0 +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_32_8 - bad reg\n"); #endif @@ -1184,8 +1282,10 @@ host_x86_MOVZX_REG_ABS_32_16(codeblock_t *block, int dst_reg, void *p) int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_32_16 - bad reg\n"); +#endif if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); @@ -1214,8 +1314,10 @@ host_x86_NOP(codeblock_t *block) void host_x86_OR8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_OR8_REG_IMM - dst_reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 2); @@ -1228,8 +1330,10 @@ host_x86_OR8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) void host_x86_OR16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_OR16_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -1247,8 +1351,10 @@ host_x86_OR16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) void host_x86_OR32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_OR32_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 3); @@ -1266,8 +1372,10 @@ host_x86_OR32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_OR8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_OR8_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x08, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*OR dst_reg, src_reg*/ @@ -1275,8 +1383,10 @@ host_x86_OR8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_OR16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_OR16_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x09, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*OR dst_reg, src_reg*/ @@ -1284,8 +1394,10 @@ host_x86_OR16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_OR32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_OR32_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x09, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*OR dst_reg, src_reg*/ @@ -1325,24 +1437,30 @@ host_x86_RET(codeblock_t *block) void host_x86_ROL8_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL8 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd2, 0xc0 | RM_OP_ROL | dst_reg); /*SHL dst_reg, CL*/ } void host_x86_ROL16_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL16 CL & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0xd3, 0xc0 | RM_OP_ROL | dst_reg); /*SHL dst_reg, CL*/ } void host_x86_ROL32_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL32 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd3, 0xc0 | RM_OP_ROL | dst_reg); /*SHL dst_reg, CL*/ } @@ -1350,24 +1468,30 @@ host_x86_ROL32_CL(codeblock_t *block, int dst_reg) void host_x86_ROL8_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL8 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc0, 0xc0 | RM_OP_ROL | dst_reg, shift); /*SHL dst_reg, shift*/ } void host_x86_ROL16_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL16 imm & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0xc1, 0xc0 | RM_OP_ROL | dst_reg, shift); /*SHL dst_reg, shift*/ } void host_x86_ROL32_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROL32 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc1, 0xc0 | RM_OP_ROL | dst_reg, shift); /*SHL dst_reg, shift*/ } @@ -1375,24 +1499,30 @@ host_x86_ROL32_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_ROR8_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR8 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd2, 0xc0 | RM_OP_ROR | dst_reg); /*SHR dst_reg, CL*/ } void host_x86_ROR16_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR16 CL & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0xd3, 0xc0 | RM_OP_ROR | dst_reg); /*SHR dst_reg, CL*/ } void host_x86_ROR32_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR32 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd3, 0xc0 | RM_OP_ROR | dst_reg); /*SHR dst_reg, CL*/ } @@ -1400,24 +1530,30 @@ host_x86_ROR32_CL(codeblock_t *block, int dst_reg) void host_x86_ROR8_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR8 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc0, 0xc0 | RM_OP_ROR | dst_reg, shift); /*SHR dst_reg, shift*/ } void host_x86_ROR16_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR16 imm & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0xc1, 0xc0 | RM_OP_ROR | dst_reg, shift); /*SHR dst_reg, shift*/ } void host_x86_ROR32_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("ROR32 im & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc1, 0xc0 | RM_OP_ROR | dst_reg, shift); /*SHR dst_reg, shift*/ } @@ -1425,8 +1561,10 @@ host_x86_ROR32_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SAR8_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR8 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd2, 0xc0 | RM_OP_SAR | dst_reg); /*SAR dst_reg, CL*/ @@ -1434,8 +1572,10 @@ host_x86_SAR8_CL(codeblock_t *block, int dst_reg) void host_x86_SAR16_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR16 CL & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0xd3, 0xc0 | RM_OP_SAR | dst_reg); /*SAR dst_reg, CL*/ @@ -1443,8 +1583,10 @@ host_x86_SAR16_CL(codeblock_t *block, int dst_reg) void host_x86_SAR32_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR32 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd3, 0xc0 | RM_OP_SAR | dst_reg); /*SAR dst_reg, CL*/ @@ -1453,8 +1595,10 @@ host_x86_SAR32_CL(codeblock_t *block, int dst_reg) void host_x86_SAR8_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR8 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc0, 0xc0 | RM_OP_SAR | dst_reg, shift); /*SAR dst_reg, shift*/ @@ -1462,8 +1606,10 @@ host_x86_SAR8_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SAR16_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR16 imm & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0xc1, 0xc0 | RM_OP_SAR | dst_reg, shift); /*SAR dst_reg, shift*/ @@ -1471,8 +1617,10 @@ host_x86_SAR16_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SAR32_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SAR32 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc1, 0xc0 | RM_OP_SAR | dst_reg, shift); /*SAR dst_reg, shift*/ } @@ -1480,8 +1628,10 @@ host_x86_SAR32_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHL8_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL8 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd2, 0xc0 | RM_OP_SHL | dst_reg); /*SHL dst_reg, CL*/ @@ -1489,8 +1639,10 @@ host_x86_SHL8_CL(codeblock_t *block, int dst_reg) void host_x86_SHL16_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL16 CL & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0xd3, 0xc0 | RM_OP_SHL | dst_reg); /*SHL dst_reg, CL*/ @@ -1498,8 +1650,10 @@ host_x86_SHL16_CL(codeblock_t *block, int dst_reg) void host_x86_SHL32_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL32 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd3, 0xc0 | RM_OP_SHL | dst_reg); /*SHL dst_reg, CL*/ @@ -1508,8 +1662,10 @@ host_x86_SHL32_CL(codeblock_t *block, int dst_reg) void host_x86_SHL8_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL8 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc0, 0xc0 | RM_OP_SHL | dst_reg, shift); /*SHL dst_reg, shift*/ @@ -1517,8 +1673,10 @@ host_x86_SHL8_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHL16_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL16 imm & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0xc1, 0xc0 | RM_OP_SHL | dst_reg, shift); /*SHL dst_reg, shift*/ @@ -1526,8 +1684,10 @@ host_x86_SHL16_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHL32_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHL32 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc1, 0xc0 | RM_OP_SHL | dst_reg, shift); /*SHL dst_reg, shift*/ @@ -1536,8 +1696,10 @@ host_x86_SHL32_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHR8_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR8 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd2, 0xc0 | RM_OP_SHR | dst_reg); /*SHR dst_reg, CL*/ @@ -1545,8 +1707,10 @@ host_x86_SHR8_CL(codeblock_t *block, int dst_reg) void host_x86_SHR16_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR16 CL & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0xd3, 0xc0 | RM_OP_SHR | dst_reg); /*SHR dst_reg, CL*/ @@ -1554,8 +1718,10 @@ host_x86_SHR16_CL(codeblock_t *block, int dst_reg) void host_x86_SHR32_CL(codeblock_t *block, int dst_reg) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR32 CL & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0xd3, 0xc0 | RM_OP_SHR | dst_reg); /*SHR dst_reg, CL*/ @@ -1564,8 +1730,10 @@ host_x86_SHR32_CL(codeblock_t *block, int dst_reg) void host_x86_SHR8_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR8 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc0, 0xc0 | RM_OP_SHR | dst_reg, shift); /*SHR dst_reg, shift*/ @@ -1573,8 +1741,10 @@ host_x86_SHR8_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHR16_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR16 imm & 8\n"); +#endif codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0xc1, 0xc0 | RM_OP_SHR | dst_reg, shift); /*SHR dst_reg, shift*/ @@ -1582,8 +1752,10 @@ host_x86_SHR16_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SHR32_IMM(codeblock_t *block, int dst_reg, int shift) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("SHR32 imm & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0xc1, 0xc0 | RM_OP_SHR | dst_reg, shift); /*SHR dst_reg, shift*/ } @@ -1591,8 +1763,10 @@ host_x86_SHR32_IMM(codeblock_t *block, int dst_reg, int shift) void host_x86_SUB8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_SUB8_REG_IMM - dst_reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 2); @@ -1605,8 +1779,10 @@ host_x86_SUB8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) void host_x86_SUB16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_SUB16_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -1624,8 +1800,10 @@ host_x86_SUB16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) void host_x86_SUB32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_SUB32_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 3); @@ -1643,8 +1821,10 @@ host_x86_SUB32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_SUB64_REG_IMM(codeblock_t *block, int dst_reg, uint64_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_SUB64_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -1655,8 +1835,10 @@ host_x86_SUB64_REG_IMM(codeblock_t *block, int dst_reg, uint64_t imm_data) void host_x86_SUB8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_SUB8_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x28, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*SUB dst_reg, src_reg*/ @@ -1664,8 +1846,10 @@ host_x86_SUB8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_SUB16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_SUB16_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x29, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*SUB dst_reg, src_reg*/ @@ -1673,8 +1857,10 @@ host_x86_SUB16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_SUB32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_SUB32_REG_REG - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x29, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*SUB dst_reg, src_reg*/ @@ -1697,8 +1883,10 @@ host_x86_TEST16_REG(codeblock_t *block, int src_host_reg, int dst_host_reg) void host_x86_TEST32_REG(codeblock_t *block, int src_reg, int dst_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_TEST32_REG - bad reg\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x85, MODRM_MOD_REG(dst_reg, src_reg)); /*TEST dst_host_reg, src_host_reg*/ @@ -1706,8 +1894,10 @@ host_x86_TEST32_REG(codeblock_t *block, int src_reg, int dst_reg) void host_x86_TEST32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("TEST32_REG_IMM reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0xa9); /*TEST EAX, imm_data*/ @@ -1722,8 +1912,10 @@ host_x86_TEST32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_XOR8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_XOR8_REG_IMM - dst_reg & 8\n"); +#endif if (dst_reg == REG_EAX) { codegen_alloc_bytes(block, 2); @@ -1736,8 +1928,10 @@ host_x86_XOR8_REG_IMM(codeblock_t *block, int dst_reg, uint8_t imm_data) void host_x86_XOR16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_XOR16_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 4); @@ -1755,8 +1949,10 @@ host_x86_XOR16_REG_IMM(codeblock_t *block, int dst_reg, uint16_t imm_data) void host_x86_XOR32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) { +#ifdef RECOMPILER_DEBUG if (dst_reg & 8) fatal("host_x86_XOR32_REG_IMM - dst_reg & 8\n"); +#endif if (is_imm8(imm_data)) { codegen_alloc_bytes(block, 3); @@ -1774,8 +1970,10 @@ host_x86_XOR32_REG_IMM(codeblock_t *block, int dst_reg, uint32_t imm_data) void host_x86_XOR8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_XOR8_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x30, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*XOR dst_reg, src_reg*/ @@ -1783,8 +1981,10 @@ host_x86_XOR8_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_XOR16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_XOR16_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x66, 0x31, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*XOR dst_reg, src_reg*/ @@ -1792,8 +1992,10 @@ host_x86_XOR16_REG_REG(codeblock_t *block, int dst_reg, int src_reg) void host_x86_XOR32_REG_REG(codeblock_t *block, int dst_reg, int src_reg) { +#ifdef RECOMPILER_DEBUG if ((dst_reg & 8) || (src_reg & 8)) fatal("host_x86_XOR32_REG_IMM - dst_reg & 8\n"); +#endif codegen_alloc_bytes(block, 2); codegen_addbyte2(block, 0x31, 0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); /*XOR dst_reg, src_reg*/ diff --git a/src/codegen_new/codegen_backend_x86-64_uops.c b/src/codegen_new/codegen_backend_x86-64_uops.c index 655896b54..6b68434a0 100644 --- a/src/codegen_new/codegen_backend_x86-64_uops.c +++ b/src/codegen_new/codegen_backend_x86-64_uops.c @@ -219,6 +219,11 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { +# if _WIN64 + host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data); +# else + host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data); +# endif host_x86_CALL(block, uop->p); host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_JNZ(block, codegen_exit_rout); diff --git a/src/codegen_new/codegen_backend_x86_uops.c b/src/codegen_new/codegen_backend_x86_uops.c index 02c441234..fad088822 100644 --- a/src/codegen_new/codegen_backend_x86_uops.c +++ b/src/codegen_new/codegen_backend_x86_uops.c @@ -221,6 +221,7 @@ codegen_CALL_FUNC_RESULT(codeblock_t *block, uop_t *uop) static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { + host_x86_MOV32_STACK_IMM(block, STACK_ARG0, uop->imm_data); host_x86_CALL(block, uop->p); host_x86_TEST32_REG(block, REG_EAX, REG_EAX); host_x86_JNZ(block, codegen_exit_rout); diff --git a/src/codegen_new/codegen_ir.c b/src/codegen_new/codegen_ir.c index ed8ae051f..d14fa0f23 100644 --- a/src/codegen_new/codegen_ir.c +++ b/src/codegen_new/codegen_ir.c @@ -38,7 +38,7 @@ codegen_ir_set_unroll(int count, int start, int first_instruction) static void duplicate_uop(ir_data_t *ir, uop_t *uop, int offset) { - uop_t *new_uop = uop_alloc(ir, uop->type); + uop_t *new_uop = uop_alloc_unroll(ir, uop->type); if (!ir_reg_is_invalid(uop->src_reg_a)) new_uop->src_reg_a = codegen_reg_read(uop->src_reg_a.reg); diff --git a/src/codegen_new/codegen_ir_defs.h b/src/codegen_new/codegen_ir_defs.h index d55e57f3d..60f7badea 100644 --- a/src/codegen_new/codegen_ir_defs.h +++ b/src/codegen_new/codegen_ir_defs.h @@ -41,8 +41,8 @@ #define UOP_LOAD_FUNC_ARG_2_IMM (UOP_TYPE_PARAMS_IMM | 0x0a | UOP_TYPE_BARRIER) #define UOP_LOAD_FUNC_ARG_3_IMM (UOP_TYPE_PARAMS_IMM | 0x0b | UOP_TYPE_BARRIER) #define UOP_CALL_FUNC (UOP_TYPE_PARAMS_POINTER | 0x10 | UOP_TYPE_BARRIER) -/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p, check return value and exit block if non-zero*/ -#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | 0x11 | UOP_TYPE_BARRIER) +/*UOP_CALL_INSTRUCTION_FUNC - call instruction handler at p with fetchdat, check return value and exit block if non-zero*/ +#define UOP_CALL_INSTRUCTION_FUNC (UOP_TYPE_PARAMS_POINTER | UOP_TYPE_PARAMS_IMM | 0x11 | UOP_TYPE_BARRIER) #define UOP_STORE_P_IMM (UOP_TYPE_PARAMS_IMM | 0x12) #define UOP_STORE_P_IMM_8 (UOP_TYPE_PARAMS_IMM | 0x13) /*UOP_LOAD_SEG - load segment in src_reg_a to segment p via loadseg(), check return value and exit block if non-zero*/ @@ -377,6 +377,34 @@ uop_alloc(ir_data_t *ir, uint32_t uop_type) uop->jump_dest_uop = -1; uop->jump_list_next = -1; + if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER)) + dirty_ir_regs[0] = dirty_ir_regs[1] = ~0ULL; + + return uop; +} + +static inline uop_t * +uop_alloc_unroll(ir_data_t *ir, uint32_t uop_type) +{ + uop_t *uop; + + if (ir->wr_pos >= UOP_NR_MAX) + fatal("Exceeded uOP max\n"); + + uop = &ir->uops[ir->wr_pos++]; + + uop->is_a16 = 0; + + uop->dest_reg_a = invalid_ir_reg; + uop->src_reg_a = invalid_ir_reg; + uop->src_reg_b = invalid_ir_reg; + uop->src_reg_c = invalid_ir_reg; + + uop->pc = cpu_state.oldpc; + + uop->jump_dest_uop = -1; + uop->jump_list_next = -1; + if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER)) codegen_reg_mark_as_required(); @@ -662,7 +690,7 @@ uop_gen_reg_src2_pointer(uint32_t uop_type, ir_data_t *ir, int src_reg_a, int sr #define uop_CALL_FUNC(ir, p) uop_gen_pointer(UOP_CALL_FUNC, ir, p) #define uop_CALL_FUNC_RESULT(ir, dst_reg, p) uop_gen_reg_dst_pointer(UOP_CALL_FUNC_RESULT, ir, dst_reg, p) -#define uop_CALL_INSTRUCTION_FUNC(ir, p) uop_gen_pointer(UOP_CALL_INSTRUCTION_FUNC, ir, p) +#define uop_CALL_INSTRUCTION_FUNC(ir, p, imm) uop_gen_pointer_imm(UOP_CALL_INSTRUCTION_FUNC, ir, p, imm) #define uop_CMP_IMM_JZ(ir, src_reg, imm, p) uop_gen_reg_src_pointer_imm(UOP_CMP_IMM_JZ, ir, src_reg, p, imm) diff --git a/src/codegen_new/codegen_reg.c b/src/codegen_new/codegen_reg.c index 75cf25ded..f91377df8 100644 --- a/src/codegen_new/codegen_reg.c +++ b/src/codegen_new/codegen_reg.c @@ -34,6 +34,8 @@ typedef struct host_reg_set_t { static host_reg_set_t host_reg_set; static host_reg_set_t host_fp_reg_set; +uint64_t dirty_ir_regs[2] = { 0, 0 }; + enum { REG_BYTE, REG_WORD, @@ -184,15 +186,36 @@ struct [IREG_temp1d] = { REG_DOUBLE, (void *) 48, REG_FP, REG_VOLATILE }, }; +static const uint8_t native_requested_sizes[9][8] = +{ + [REG_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1, + [REG_WORD][IREG_SIZE_W >> IREG_SIZE_SHIFT] = 1, + [REG_DWORD][IREG_SIZE_L >> IREG_SIZE_SHIFT] = 1, + [REG_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + + [REG_POINTER][(sizeof(void *) == 4) ? (IREG_SIZE_L >> IREG_SIZE_SHIFT) : (IREG_SIZE_Q >> IREG_SIZE_SHIFT)] = 1 +}; + void codegen_reg_mark_as_required(void) { - for (uint8_t reg = 0; reg < IREG_COUNT; reg++) { + /* This used to start from IREG_EAX, now only starts from IREG_ESP since the first 4 registers are never optimized out. */ + /* It also no longer iterates through volatile registers unnecessarily. */ + for (uint8_t reg = IREG_ESP; reg < IREG_temp0; reg++) { int last_version = reg_last_version[reg]; - if (last_version > 0 && ireg_data[reg].is_volatile == REG_PERMANENT) + if (last_version > 0) reg_version[reg][last_version].flags |= REG_FLAGS_REQUIRED; } + dirty_ir_regs[0] = dirty_ir_regs[1] = 0; } int @@ -201,29 +224,7 @@ reg_is_native_size(ir_reg_t ir_reg) int native_size = ireg_data[IREG_GET_REG(ir_reg.reg)].native_size; int requested_size = IREG_GET_SIZE(ir_reg.reg); - switch (native_size) { - case REG_BYTE: - case REG_FPU_ST_BYTE: - return (requested_size == IREG_SIZE_B); - case REG_WORD: - return (requested_size == IREG_SIZE_W); - case REG_DWORD: - return (requested_size == IREG_SIZE_L); - case REG_QWORD: - case REG_FPU_ST_QWORD: - case REG_DOUBLE: - case REG_FPU_ST_DOUBLE: - return ((requested_size == IREG_SIZE_D) || (requested_size == IREG_SIZE_Q)); - case REG_POINTER: - if (sizeof(void *) == 4) - return (requested_size == IREG_SIZE_L); - return (requested_size == IREG_SIZE_Q); - - default: - fatal("get_reg_is_native_size: unknown native size %i\n", native_size); - } - - return 0; + return native_requested_sizes[native_size][requested_size >> IREG_SIZE_SHIFT]; } void @@ -256,6 +257,8 @@ codegen_reg_reset(void) host_fp_reg_set.locked = 0; host_fp_reg_set.nr_regs = CODEGEN_HOST_FP_REGS; + dirty_ir_regs[0] = dirty_ir_regs[1] = 0; + for (c = 0; c < IREG_COUNT; c++) { reg_last_version[c] = 0; reg_version[c][0].refcount = 0; diff --git a/src/codegen_new/codegen_reg.h b/src/codegen_new/codegen_reg.h index 2185fde45..a86bcd1cf 100644 --- a/src/codegen_new/codegen_reg.h +++ b/src/codegen_new/codegen_reg.h @@ -16,59 +16,45 @@ #define IREG_SIZE_Q (5 << IREG_SIZE_SHIFT) enum { - IREG_EAX = 0, - IREG_ECX = 1, - IREG_EDX = 2, - IREG_EBX = 3, - IREG_ESP = 4, - IREG_EBP = 5, - IREG_ESI = 6, - IREG_EDI = 7, + IREG_EAX, + IREG_ECX, + IREG_EDX, + IREG_EBX, + IREG_ESP, + IREG_EBP, + IREG_ESI, + IREG_EDI, - IREG_flags_op = 8, - IREG_flags_res = 9, - IREG_flags_op1 = 10, - IREG_flags_op2 = 11, + IREG_flags_op, + IREG_flags_res, + IREG_flags_op1, + IREG_flags_op2, - IREG_pc = 12, - IREG_oldpc = 13, + IREG_pc, + IREG_oldpc, - IREG_eaaddr = 14, - IREG_ea_seg = 15, - IREG_op32 = 16, - IREG_ssegsx = 17, + IREG_eaaddr, + IREG_ea_seg, + IREG_op32, + IREG_ssegsx, - IREG_rm_mod_reg = 18, + IREG_rm_mod_reg, - IREG_acycs = 19, - IREG_cycles = 20, + IREG_cycles, - IREG_CS_base = 21, - IREG_DS_base = 22, - IREG_ES_base = 23, - IREG_FS_base = 24, - IREG_GS_base = 25, - IREG_SS_base = 26, + IREG_CS_base, + IREG_DS_base, + IREG_ES_base, + IREG_FS_base, + IREG_GS_base, + IREG_SS_base, - IREG_CS_seg = 27, - IREG_DS_seg = 28, - IREG_ES_seg = 29, - IREG_FS_seg = 30, - IREG_GS_seg = 31, - IREG_SS_seg = 32, - - /*Temporary registers are stored on the stack, and are not guaranteed to - be preserved across uOPs. They will not be written back if they will - not be read again.*/ - IREG_temp0 = 33, - IREG_temp1 = 34, - IREG_temp2 = 35, - IREG_temp3 = 36, - - IREG_FPU_TOP = 37, - - IREG_temp0d = 38, - IREG_temp1d = 39, + IREG_CS_seg, + IREG_DS_seg, + IREG_ES_seg, + IREG_FS_seg, + IREG_GS_seg, + IREG_SS_seg, /*FPU stack registers are physical registers. Use IREG_ST() / IREG_tag() to access. @@ -76,66 +62,79 @@ enum { used directly to index the stack. When it is clear, the difference between the current value of TOP and the value when the block was first compiled will be added to adjust for any changes in TOP.*/ - IREG_ST0 = 40, - IREG_ST1 = 41, - IREG_ST2 = 42, - IREG_ST3 = 43, - IREG_ST4 = 44, - IREG_ST5 = 45, - IREG_ST6 = 46, - IREG_ST7 = 47, + IREG_ST0, + IREG_ST1, + IREG_ST2, + IREG_ST3, + IREG_ST4, + IREG_ST5, + IREG_ST6, + IREG_ST7, - IREG_tag0 = 48, - IREG_tag1 = 49, - IREG_tag2 = 50, - IREG_tag3 = 51, - IREG_tag4 = 52, - IREG_tag5 = 53, - IREG_tag6 = 54, - IREG_tag7 = 55, + IREG_tag0, + IREG_tag1, + IREG_tag2, + IREG_tag3, + IREG_tag4, + IREG_tag5, + IREG_tag6, + IREG_tag7, - IREG_ST0_i64 = 56, - IREG_ST1_i64 = 57, - IREG_ST2_i64 = 58, - IREG_ST3_i64 = 59, - IREG_ST4_i64 = 60, - IREG_ST5_i64 = 61, - IREG_ST6_i64 = 62, - IREG_ST7_i64 = 63, + IREG_ST0_i64, + IREG_ST1_i64, + IREG_ST2_i64, + IREG_ST3_i64, + IREG_ST4_i64, + IREG_ST5_i64, + IREG_ST6_i64, + IREG_ST7_i64, - IREG_MM0x = 64, - IREG_MM1x = 65, - IREG_MM2x = 66, - IREG_MM3x = 67, - IREG_MM4x = 68, - IREG_MM5x = 69, - IREG_MM6x = 70, - IREG_MM7x = 71, + IREG_MM0x, + IREG_MM1x, + IREG_MM2x, + IREG_MM3x, + IREG_MM4x, + IREG_MM5x, + IREG_MM6x, + IREG_MM7x, - IREG_NPXCx = 72, - IREG_NPXSx = 73, + IREG_NPXCx, + IREG_NPXSx, - IREG_flagsx = 74, - IREG_eflagsx = 75, + IREG_flagsx, + IREG_eflagsx, - IREG_CS_limit_low = 76, - IREG_DS_limit_low = 77, - IREG_ES_limit_low = 78, - IREG_FS_limit_low = 79, - IREG_GS_limit_low = 80, - IREG_SS_limit_low = 81, + IREG_CS_limit_low, + IREG_DS_limit_low, + IREG_ES_limit_low, + IREG_FS_limit_low, + IREG_GS_limit_low, + IREG_SS_limit_low, - IREG_CS_limit_high = 82, - IREG_DS_limit_high = 83, - IREG_ES_limit_high = 84, - IREG_FS_limit_high = 85, - IREG_GS_limit_high = 86, - IREG_SS_limit_high = 87, + IREG_CS_limit_high, + IREG_DS_limit_high, + IREG_ES_limit_high, + IREG_FS_limit_high, + IREG_GS_limit_high, + IREG_SS_limit_high, - IREG_eaa16 = 88, - IREG_x87_op = 89, + IREG_eaa16, + IREG_x87_op, - IREG_COUNT = 90, + IREG_FPU_TOP, + + /*Temporary registers are stored on the stack, and are not guaranteed to + be preserved across uOPs. They will not be written back if they will + not be read again.*/ + IREG_temp0, + IREG_temp1, + IREG_temp2, + IREG_temp3, + + IREG_temp0d, + IREG_temp1d, + + IREG_COUNT, IREG_INVALID = 255, @@ -279,6 +278,7 @@ ireg_seg_limit_high(x86seg *seg) } extern uint8_t reg_last_version[IREG_COUNT]; +extern uint64_t dirty_ir_regs[2]; /*This version of the register must be calculated, regardless of whether it is apparently required or not. Do not optimise out.*/ @@ -363,10 +363,12 @@ codegen_reg_write(int reg, int uop_nr) int last_version = reg_last_version[IREG_GET_REG(reg)]; reg_version_t *version; -#ifndef RELEASE_BUILD - if (IREG_GET_REG(reg) == IREG_INVALID) - fatal("codegen_reg_write - IREG_INVALID\n"); -#endif + if (dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] & (1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full))) { + dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] &= ~(1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full)); + if ((IREG_GET_REG(reg) > IREG_EBX && IREG_GET_REG(reg) < IREG_temp0) && last_version > 0) { + reg_version[IREG_GET_REG(reg)][last_version].flags |= REG_FLAGS_REQUIRED; + } + } ireg.reg = reg; ireg.version = last_version + 1; @@ -376,12 +378,8 @@ codegen_reg_write(int reg, int uop_nr) } reg_last_version[IREG_GET_REG(reg)]++; -#ifndef RELEASE_BUILD - if (!reg_last_version[IREG_GET_REG(reg)]) - fatal("codegen_reg_write - version overflow\n"); - else -#endif - if (reg_last_version[IREG_GET_REG(reg)] > REG_VERSION_MAX) + + if (reg_last_version[IREG_GET_REG(reg)] > REG_VERSION_MAX) CPU_BLOCK_END(); if (reg_last_version[IREG_GET_REG(reg)] > max_version_refcount) max_version_refcount = reg_last_version[IREG_GET_REG(reg)];