From 395537070b029b39f1f98170db2073652c1ae39c Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 13 Jun 2020 10:53:11 +0200 Subject: [PATCH] Renamed the three CPU folders to their final names. --- src/codegen/codegen.c | 36 + src/codegen/codegen.h | 413 ++ src/codegen/codegen_accumulate.h | 13 + src/codegen/codegen_accumulate_x86-64.c | 50 + src/codegen/codegen_accumulate_x86.c | 49 + src/codegen/codegen_ops.c | 598 +++ src/codegen/codegen_ops.h | 46 + src/codegen/codegen_ops_arith.h | 1028 ++++ src/codegen/codegen_ops_fpu.h | 645 +++ src/codegen/codegen_ops_jump.h | 270 + src/codegen/codegen_ops_logic.h | 564 +++ src/codegen/codegen_ops_misc.h | 269 + src/codegen/codegen_ops_mmx.h | 277 + src/codegen/codegen_ops_mov.h | 656 +++ src/codegen/codegen_ops_shift.h | 125 + src/codegen/codegen_ops_stack.h | 269 + src/codegen/codegen_ops_x86-64.h | 6180 +++++++++++++++++++++++ src/codegen/codegen_ops_x86.h | 3988 +++++++++++++++ src/codegen/codegen_ops_xchg.h | 93 + src/codegen/codegen_timing_486.c | 421 ++ src/codegen/codegen_timing_686.c | 1057 ++++ src/codegen/codegen_timing_common.c | 850 ++++ src/codegen/codegen_timing_common.h | 232 + src/codegen/codegen_timing_k6.c | 2353 +++++++++ src/codegen/codegen_timing_p6.c | 2108 ++++++++ src/codegen/codegen_timing_pentium.c | 1323 +++++ src/codegen/codegen_timing_winchip.c | 421 ++ src/codegen/codegen_timing_winchip2.c | 743 +++ src/codegen/codegen_x86-64.c | 1178 +++++ src/codegen/codegen_x86-64.h | 23 + src/codegen/codegen_x86.c | 2156 ++++++++ src/codegen/codegen_x86.h | 42 + src/codegen/x86_flags.h | 526 ++ src/codegen/x86_ops_call.h | 457 ++ src/codegen/x86_ops_shift.h | 619 +++ src/codegen/x86seg.c | 2607 ++++++++++ 36 files changed, 32685 insertions(+) create mode 100644 src/codegen/codegen.c create mode 100644 src/codegen/codegen.h create mode 100644 src/codegen/codegen_accumulate.h create mode 100644 src/codegen/codegen_accumulate_x86-64.c create mode 100644 src/codegen/codegen_accumulate_x86.c create mode 100644 src/codegen/codegen_ops.c create mode 100644 src/codegen/codegen_ops.h create mode 100644 src/codegen/codegen_ops_arith.h create mode 100644 src/codegen/codegen_ops_fpu.h create mode 100644 src/codegen/codegen_ops_jump.h create mode 100644 src/codegen/codegen_ops_logic.h create mode 100644 src/codegen/codegen_ops_misc.h create mode 100644 src/codegen/codegen_ops_mmx.h create mode 100644 src/codegen/codegen_ops_mov.h create mode 100644 src/codegen/codegen_ops_shift.h create mode 100644 src/codegen/codegen_ops_stack.h create mode 100644 src/codegen/codegen_ops_x86-64.h create mode 100644 src/codegen/codegen_ops_x86.h create mode 100644 src/codegen/codegen_ops_xchg.h create mode 100644 src/codegen/codegen_timing_486.c create mode 100644 src/codegen/codegen_timing_686.c create mode 100644 src/codegen/codegen_timing_common.c create mode 100644 src/codegen/codegen_timing_common.h create mode 100644 src/codegen/codegen_timing_k6.c create mode 100644 src/codegen/codegen_timing_p6.c create mode 100644 src/codegen/codegen_timing_pentium.c create mode 100644 src/codegen/codegen_timing_winchip.c create mode 100644 src/codegen/codegen_timing_winchip2.c create mode 100644 src/codegen/codegen_x86-64.c create mode 100644 src/codegen/codegen_x86-64.h create mode 100644 src/codegen/codegen_x86.c create mode 100644 src/codegen/codegen_x86.h create mode 100644 src/codegen/x86_flags.h create mode 100644 src/codegen/x86_ops_call.h create mode 100644 src/codegen/x86_ops_shift.h create mode 100644 src/codegen/x86seg.c diff --git a/src/codegen/codegen.c b/src/codegen/codegen.c new file mode 100644 index 000000000..f633243ca --- /dev/null +++ b/src/codegen/codegen.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include "x86_ops.h" +#include "codegen.h" + +void (*codegen_timing_start)(); +void (*codegen_timing_prefix)(uint8_t prefix, uint32_t fetchdat); +void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc); +void (*codegen_timing_block_start)(); +void (*codegen_timing_block_end)(); +int (*codegen_timing_jump_cycles)(); + +void codegen_timing_set(codegen_timing_t *timing) +{ + codegen_timing_start = timing->start; + codegen_timing_prefix = timing->prefix; + codegen_timing_opcode = timing->opcode; + codegen_timing_block_start = timing->block_start; + codegen_timing_block_end = timing->block_end; + codegen_timing_jump_cycles = timing->jump_cycles; +} + +int codegen_in_recompile; + +/* This is for compatibility with new x87 code. */ +void codegen_set_rounding_mode(int mode) +{ + /* cpu_state.new_npxc = (cpu_state.old_npxc & ~0xc00) | (cpu_state.npxc & 0xc00); */ + cpu_state.new_npxc = (cpu_state.old_npxc & ~0xc00) | (mode << 10); +} diff --git a/src/codegen/codegen.h b/src/codegen/codegen.h new file mode 100644 index 000000000..086d00347 --- /dev/null +++ b/src/codegen/codegen.h @@ -0,0 +1,413 @@ +/* + * VARCem Virtual ARchaeological Computer EMulator. + * An emulator of (mostly) x86-based PC systems and devices, + * using the ISA,EISA,VLB,MCA and PCI system buses, roughly + * spanning the era between 1981 and 1995. + * + * This file is part of the VARCem Project. + * + * Definitions for the code generator. + * + * + * + * Authors: Sarah Walker, + * Miran Grca, + * + * Copyright 2008-2018 Sarah Walker. + * Copyright 2016-2018 Miran Grca. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * + * Free Software Foundation, Inc. + * 59 Temple Place - Suite 330 + * Boston, MA 02111-1307 + * USA. + */ +#ifndef _CODEGEN_H_ +#define _CODEGEN_H_ + +#include <86box/mem.h> +#include "x86_ops.h" + +#ifdef __amd64__ +#include "codegen_x86-64.h" +#elif defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 +#include "codegen_x86.h" +#else +#error Dynamic recompiler not implemented on your platform +#endif + +/*Handling self-modifying code (of which there is a lot on x86) : + + PCem tracks a 'dirty mask' for each physical page, in which each bit + represents 64 bytes. This is only tracked for pages that have code in - when a + page first has a codeblock generated, it is evicted from the writelookup and + added to the page_lookup for this purpose. When in the page_lookup, each write + will go through the mem_write_ram*_page() functions and set the dirty mask + appropriately. + + Each codeblock also contains a code mask (actually two masks, one for each + page the block is/may be in), again with each bit representing 64 bytes. + + Each page has a list of codeblocks present in it. As each codeblock can span + up to two pages, two lists are present. + + When a codeblock is about to be executed, the code masks are compared with the + dirty masks for the relevant pages. If either intersect, then + codegen_check_flush() is called on the affected page(s), and all affected + blocks are evicted. + + The 64 byte granularity appears to work reasonably well for most cases, + avoiding most unnecessary evictions (eg when code & data are stored in the + same page). +*/ + +typedef struct codeblock_t +{ + uint64_t page_mask, page_mask2; + uint64_t *dirty_mask, *dirty_mask2; + uint64_t cmp; + + /*Previous and next pointers, for the codeblock list associated with + each physical page. Two sets of pointers, as a codeblock can be + present in two pages.*/ + struct codeblock_t *prev, *next; + struct codeblock_t *prev_2, *next_2; + + /*Pointers for codeblock tree, used to search for blocks when hash lookup + fails.*/ + struct codeblock_t *parent, *left, *right; + + int pnt; + int ins; + + int valid; + + int was_recompiled; + int TOP; + + uint32_t pc; + uint32_t _cs; + uint32_t endpc; + uint32_t phys, phys_2; + uint32_t status; + uint32_t flags; + + uint8_t data[2048]; +} codeblock_t; + +/*Code block uses FPU*/ +#define CODEBLOCK_HAS_FPU 1 +/*Code block is always entered with the same FPU top-of-stack*/ +#define CODEBLOCK_STATIC_TOP 2 + +static inline codeblock_t *codeblock_tree_find(uint32_t phys, uint32_t _cs) +{ + codeblock_t *block = pages[phys >> 12].head; + uint64_t a = _cs | ((uint64_t)phys << 32); + + while (block) + { + if (a == block->cmp) + { + if (!((block->status ^ cpu_cur_status) & CPU_STATUS_FLAGS) && + ((block->status & cpu_cur_status & CPU_STATUS_MASK) == (cpu_cur_status & CPU_STATUS_MASK))) + break; + } + if (a < block->cmp) + block = block->left; + else + block = block->right; + } + + return block; +} + +static inline void codeblock_tree_add(codeblock_t *new_block) +{ + codeblock_t *block = pages[new_block->phys >> 12].head; + uint64_t a = new_block->_cs | ((uint64_t)new_block->phys << 32); + new_block->cmp = a; + + if (!block) + { + pages[new_block->phys >> 12].head = new_block; + new_block->parent = new_block->left = new_block->right = NULL; + } + else + { + codeblock_t *old_block = NULL; + + while (block) + { + old_block = block; + if (a < old_block->cmp) + block = block->left; + else + block = block->right; + } + + if (a < old_block->cmp) + old_block->left = new_block; + else + old_block->right = new_block; + + new_block->parent = old_block; + new_block->left = new_block->right = NULL; + } +} + +static inline void codeblock_tree_delete(codeblock_t *block) +{ + codeblock_t *parent = block->parent; + + if (!block->left && !block->right) + { + /*Easy case - remove from parent*/ + if (!parent) + pages[block->phys >> 12].head = NULL; + else + { + if (parent->left == block) + parent->left = NULL; + if (parent->right == block) + parent->right = NULL; + } + return; + } + else if (!block->left) + { + /*Only right node*/ + if (!parent) + { + pages[block->phys >> 12].head = block->right; + pages[block->phys >> 12].head->parent = NULL; + } + else + { + if (parent->left == block) + { + parent->left = block->right; + parent->left->parent = parent; + } + if (parent->right == block) + { + parent->right = block->right; + parent->right->parent = parent; + } + } + return; + } + else if (!block->right) + { + /*Only left node*/ + if (!parent) + { + pages[block->phys >> 12].head = block->left; + pages[block->phys >> 12].head->parent = NULL; + } + else + { + if (parent->left == block) + { + parent->left = block->left; + parent->left->parent = parent; + } + if (parent->right == block) + { + parent->right = block->left; + parent->right->parent = parent; + } + } + return; + } + else + { + /*Difficult case - node has two children. Walk right child to find lowest node*/ + codeblock_t *lowest = block->right, *highest; + codeblock_t *old_parent; + + while (lowest->left) + lowest = lowest->left; + + old_parent = lowest->parent; + + /*Replace deleted node with lowest node*/ + if (!parent) + pages[block->phys >> 12].head = lowest; + else + { + if (parent->left == block) + parent->left = lowest; + if (parent->right == block) + parent->right = lowest; + } + + lowest->parent = parent; + lowest->left = block->left; + if (lowest->left) + lowest->left->parent = lowest; + + old_parent->left = NULL; + + highest = lowest->right; + if (!highest) + { + if (lowest != block->right) + { + lowest->right = block->right; + block->right->parent = lowest; + } + return; + } + + while (highest->right) + highest = highest->right; + + if (block->right && block->right != lowest) + { + highest->right = block->right; + block->right->parent = highest; + } + } +} + +#define PAGE_MASK_INDEX_MASK 3 +#define PAGE_MASK_INDEX_SHIFT 10 +#define PAGE_MASK_MASK 63 +#define PAGE_MASK_SHIFT 4 + +extern codeblock_t *codeblock; + +extern codeblock_t **codeblock_hash; + +void codegen_init(); +void codegen_reset(); +void codegen_block_init(uint32_t phys_addr); +void codegen_block_remove(); +void codegen_block_start_recompile(codeblock_t *block); +void codegen_block_end_recompile(codeblock_t *block); +void codegen_block_end(); +void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc); +void codegen_generate_seg_restore(); +void codegen_set_op32(); +void codegen_flush(); +void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr); + +extern int cpu_block_end; +extern uint32_t codegen_endpc; + +extern int cpu_recomp_blocks, cpu_recomp_full_ins, cpu_new_blocks; +extern int cpu_recomp_blocks_latched, cpu_recomp_ins_latched, cpu_recomp_full_ins_latched, cpu_new_blocks_latched; +extern int cpu_recomp_flushes, cpu_recomp_flushes_latched; +extern int cpu_recomp_evicted, cpu_recomp_evicted_latched; +extern int cpu_recomp_reuse, cpu_recomp_reuse_latched; +extern int cpu_recomp_removed, cpu_recomp_removed_latched; + +extern int codegen_block_cycles; + +extern void (*codegen_timing_start)(); +extern void (*codegen_timing_prefix)(uint8_t prefix, uint32_t fetchdat); +extern void (*codegen_timing_opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc); +extern void (*codegen_timing_block_start)(); +extern void (*codegen_timing_block_end)(); +extern int (*codegen_timing_jump_cycles)(); + +typedef struct codegen_timing_t +{ + void (*start)(); + void (*prefix)(uint8_t prefix, uint32_t fetchdat); + void (*opcode)(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc); + void (*block_start)(); + void (*block_end)(); + int (*jump_cycles)(); +} codegen_timing_t; + +extern codegen_timing_t codegen_timing_pentium; +extern codegen_timing_t codegen_timing_686; +extern codegen_timing_t codegen_timing_486; +extern codegen_timing_t codegen_timing_winchip; +extern codegen_timing_t codegen_timing_winchip2; +extern codegen_timing_t codegen_timing_k6; +extern codegen_timing_t codegen_timing_p6; + +void codegen_timing_set(codegen_timing_t *timing); + +extern int block_current; +extern int block_pos; + +#define CPU_BLOCK_END() cpu_block_end = 1 + +static inline void addbyte(uint8_t val) +{ + codeblock[block_current].data[block_pos++] = val; + if (block_pos >= BLOCK_MAX) + { + CPU_BLOCK_END(); + } +} + +static inline void addword(uint16_t val) +{ + uint16_t *p = (uint16_t *) &codeblock[block_current].data[block_pos]; + *p = val; + block_pos += 2; + if (block_pos >= BLOCK_MAX) + { + CPU_BLOCK_END(); + } +} + +static inline void addlong(uint32_t val) +{ + uint32_t *p = (uint32_t *) &codeblock[block_current].data[block_pos]; + *p = val; + block_pos += 4; + if (block_pos >= BLOCK_MAX) + { + CPU_BLOCK_END(); + } +} + +static inline void addquad(uint64_t val) +{ + uint64_t *p = (uint64_t *) &codeblock[block_current].data[block_pos]; + *p = val; + block_pos += 8; + if (block_pos >= BLOCK_MAX) + { + CPU_BLOCK_END(); + } +} + +/*Current physical page of block being recompiled. -1 if no recompilation taking place */ +extern uint32_t recomp_page; + +extern x86seg *op_ea_seg; +extern int op_ssegs; +extern uint32_t op_old_pc; + +/*Set to 1 if flags have been changed in the block being recompiled, and hence + flags_op is known and can be relied on */ +extern int codegen_flags_changed; + +extern int codegen_fpu_entered; +extern int codegen_mmx_entered; + +extern int codegen_fpu_loaded_iq[8]; +extern int codegen_reg_loaded[8]; + +extern int codegen_in_recompile; + +#endif diff --git a/src/codegen/codegen_accumulate.h b/src/codegen/codegen_accumulate.h new file mode 100644 index 000000000..8f9f6c95b --- /dev/null +++ b/src/codegen/codegen_accumulate.h @@ -0,0 +1,13 @@ +enum +{ + ACCREG_ins = 0, + ACCREG_cycles = 1, + + ACCREG_COUNT +}; + +struct ir_data_t; + +void codegen_accumulate(int acc_reg, int delta); +void codegen_accumulate_flush(void); +void codegen_accumulate_reset(); diff --git a/src/codegen/codegen_accumulate_x86-64.c b/src/codegen/codegen_accumulate_x86-64.c new file mode 100644 index 000000000..9b089df02 --- /dev/null +++ b/src/codegen/codegen_accumulate_x86-64.c @@ -0,0 +1,50 @@ +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> + +#include "codegen.h" +#include "codegen_accumulate.h" + +static struct +{ + int count; + uintptr_t dest_reg; +} acc_regs[] = +{ + [ACCREG_ins] = {0, (uintptr_t) &(ins)}, + [ACCREG_cycles] = {0, (uintptr_t) &(cycles)}, +}; + +void codegen_accumulate(int acc_reg, int delta) +{ + acc_regs[acc_reg].count += delta; +} + +void codegen_accumulate_flush(void) +{ + int c; + + for (c = 0; c < ACCREG_COUNT; c++) + { + if (acc_regs[c].count) + { + addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/ + addbyte(0x04); + addbyte(0x25); + addlong((uint32_t) acc_regs[c].dest_reg); + addlong(acc_regs[c].count); + } + + acc_regs[c].count = 0; + } +} + +void codegen_accumulate_reset() +{ + int c; + + for (c = 0; c < ACCREG_COUNT; c++) + acc_regs[c].count = 0; +} diff --git a/src/codegen/codegen_accumulate_x86.c b/src/codegen/codegen_accumulate_x86.c new file mode 100644 index 000000000..2b99d4c66 --- /dev/null +++ b/src/codegen/codegen_accumulate_x86.c @@ -0,0 +1,49 @@ +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> + +#include "codegen.h" +#include "codegen_accumulate.h" + +static struct +{ + int count; + uintptr_t dest_reg; +} acc_regs[] = +{ + [ACCREG_ins] = {0, (uintptr_t) &(ins)}, + [ACCREG_cycles] = {0, (uintptr_t) &(cycles)}, +}; + +void codegen_accumulate(int acc_reg, int delta) +{ + acc_regs[acc_reg].count += delta; +} + +void codegen_accumulate_flush(void) +{ + int c; + + for (c = 0; c < ACCREG_COUNT; c++) + { + if (acc_regs[c].count) + { + addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/ + addbyte(0x05); + addlong((uint32_t) acc_regs[c].dest_reg); + addlong(acc_regs[c].count); + } + + acc_regs[c].count = 0; + } +} + +void codegen_accumulate_reset() +{ + int c; + + for (c = 0; c < ACCREG_COUNT; c++) + acc_regs[c].count = 0; +} diff --git a/src/codegen/codegen_ops.c b/src/codegen/codegen_ops.c new file mode 100644 index 000000000..d1410346f --- /dev/null +++ b/src/codegen/codegen_ops.c @@ -0,0 +1,598 @@ +#include +#include +#include +#include + +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include "x86.h" +#include "x86_ops.h" +#include "x86_flags.h" +#include "x87.h" +#include "386_common.h" +#include "cpu.h" +#include "codegen.h" +#include "codegen_ops.h" + +#ifdef __amd64__ +#include "codegen_ops_x86-64.h" +#elif defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 +#include "codegen_ops_x86.h" +#endif + +#include "codegen_ops_arith.h" +#include "codegen_ops_fpu.h" +#include "codegen_ops_jump.h" +#include "codegen_ops_logic.h" +#include "codegen_ops_misc.h" +#include "codegen_ops_mmx.h" +#include "codegen_ops_mov.h" +#include "codegen_ops_shift.h" +#include "codegen_ops_stack.h" +#include "codegen_ops_xchg.h" + +RecompOpFn recomp_opcodes[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropADD_b_rmw, ropADD_w_rmw, ropADD_b_rm, ropADD_w_rm, ropADD_AL_imm, ropADD_AX_imm, ropPUSH_ES_16, ropPOP_ES_16, ropOR_b_rmw, ropOR_w_rmw, ropOR_b_rm, ropOR_w_rm, ropOR_AL_imm, ropOR_AX_imm, ropPUSH_CS_16, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_SS_16, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_DS_16, ropPOP_DS_16, +/*20*/ ropAND_b_rmw, ropAND_w_rmw, ropAND_b_rm, ropAND_w_rm, ropAND_AL_imm, ropAND_AX_imm, NULL, NULL, ropSUB_b_rmw, ropSUB_w_rmw, ropSUB_b_rm, ropSUB_w_rm, ropSUB_AL_imm, ropSUB_AX_imm, NULL, NULL, +/*30*/ ropXOR_b_rmw, ropXOR_w_rmw, ropXOR_b_rm, ropXOR_w_rm, ropXOR_AL_imm, ropXOR_AX_imm, NULL, NULL, ropCMP_b_rmw, ropCMP_w_rmw, ropCMP_b_rm, ropCMP_w_rm, ropCMP_AL_imm, ropCMP_AX_imm, NULL, NULL, + +/*40*/ ropINC_rw, ropINC_rw, ropINC_rw, ropINC_rw, ropINC_rw, ropINC_rw, ropINC_rw, ropINC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, ropDEC_rw, +/*50*/ ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPUSH_16, ropPOP_16, ropPOP_16, ropPOP_16, ropPOP_16, ropPOP_16, ropPOP_16, ropPOP_16, ropPOP_16, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_imm_16, NULL, ropPUSH_imm_b16,NULL, NULL, NULL, NULL, NULL, +/*70*/ ropJO, ropJNO, ropJB, ropJNB, ropJE, ropJNE, ropJBE, ropJNBE, ropJS, ropJNS, ropJP, ropJNP, ropJL, ropJNL, ropJLE, ropJNLE, + +/*80*/ rop80, rop81_w, rop80, rop83_w, ropTEST_b_rm, ropTEST_w_rm, ropXCHG_b, ropXCHG_w, ropMOV_b_r, ropMOV_w_r, ropMOV_r_b, ropMOV_r_w, ropMOV_w_seg, ropLEA_w, ropMOV_seg_w, NULL, +/*90*/ ropNOP, ropXCHG_AX_CX, ropXCHG_AX_DX, ropXCHG_AX_BX, ropXCHG_AX_SP, ropXCHG_AX_BP, ropXCHG_AX_SI, ropXCHG_AX_DI, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ ropMOV_AL_a, ropMOV_AX_a, ropMOV_a_AL, ropMOV_a_AX, NULL, NULL, NULL, NULL, ropTEST_AL_imm, ropTEST_AX_imm, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, ropMOV_rw_imm, + +/*c0*/ ropC0, ropC1_w, ropRET_imm_16, ropRET_16, ropLES, ropLDS, ropMOV_b_imm, ropMOV_w_imm, NULL, ropLEAVE_16, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ ropD0, ropD1_w, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, ropLOOP, ropJCXZ, NULL, NULL, NULL, NULL, ropCALL_r16, ropJMP_r16, NULL, ropJMP_r8, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, ropF6, ropF7_w, NULL, NULL, ropCLI, ropSTI, ropCLD, ropSTD, ropFE, ropFF_16, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropADD_b_rmw, ropADD_l_rmw, ropADD_b_rm, ropADD_l_rm, ropADD_AL_imm, ropADD_EAX_imm, ropPUSH_ES_32, ropPOP_ES_32, ropOR_b_rmw, ropOR_l_rmw, ropOR_b_rm, ropOR_l_rm, ropOR_AL_imm, ropOR_EAX_imm, ropPUSH_CS_32, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_SS_32, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_DS_32, ropPOP_DS_32, +/*20*/ ropAND_b_rmw, ropAND_l_rmw, ropAND_b_rm, ropAND_l_rm, ropAND_AL_imm, ropAND_EAX_imm, NULL, NULL, ropSUB_b_rmw, ropSUB_l_rmw, ropSUB_b_rm, ropSUB_l_rm, ropSUB_AL_imm, ropSUB_EAX_imm, NULL, NULL, +/*30*/ ropXOR_b_rmw, ropXOR_l_rmw, ropXOR_b_rm, ropXOR_l_rm, ropXOR_AL_imm, ropXOR_EAX_imm, NULL, NULL, ropCMP_b_rmw, ropCMP_l_rmw, ropCMP_b_rm, ropCMP_l_rm, ropCMP_AL_imm, ropCMP_EAX_imm, NULL, NULL, + +/*40*/ ropINC_rl, ropINC_rl, ropINC_rl, ropINC_rl, ropINC_rl, ropINC_rl, ropINC_rl, ropINC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, ropDEC_rl, +/*50*/ ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPUSH_32, ropPOP_32, ropPOP_32, ropPOP_32, ropPOP_32, ropPOP_32, ropPOP_32, ropPOP_32, ropPOP_32, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_imm_32, NULL, ropPUSH_imm_b32,NULL, NULL, NULL, NULL, NULL, +/*70*/ ropJO, ropJNO, ropJB, ropJNB, ropJE, ropJNE, ropJBE, ropJNBE, ropJS, ropJNS, ropJP, ropJNP, ropJL, ropJNL, ropJLE, ropJNLE, + +/*80*/ rop80, rop81_l, rop80, rop83_l, ropTEST_b_rm, ropTEST_l_rm, ropXCHG_b, ropXCHG_l, ropMOV_b_r, ropMOV_l_r, ropMOV_r_b, ropMOV_r_l, ropMOV_w_seg, ropLEA_l, ropMOV_seg_w, NULL, +/*90*/ ropNOP, ropXCHG_EAX_ECX,ropXCHG_EAX_EDX,ropXCHG_EAX_EBX,ropXCHG_EAX_ESP,ropXCHG_EAX_EBP,ropXCHG_EAX_ESI,ropXCHG_EAX_EDI,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ ropMOV_AL_a, ropMOV_EAX_a, ropMOV_a_AL, ropMOV_a_EAX, NULL, NULL, NULL, NULL, ropTEST_AL_imm, ropTEST_EAX_imm,NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rb_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, ropMOV_rl_imm, + +/*c0*/ ropC0, ropC1_l, ropRET_imm_32, ropRET_32, ropLES, ropLDS, ropMOV_b_imm, ropMOV_l_imm, NULL, ropLEAVE_32, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ ropD0, ropD1_l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, ropLOOP, ropJCXZ, NULL, NULL, NULL, NULL, ropCALL_r32, ropJMP_r32, NULL, ropJMP_r8, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, ropF6, ropF7_l, NULL, NULL, ropCLI, ropSTI, ropCLD, ropSTD, ropFE, ropFF_32 +}; + +RecompOpFn recomp_opcodes_0f[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropEMMS, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ ropJO_w, ropJNO_w, ropJB_w, ropJNB_w, ropJE_w, ropJNE_w, ropJBE_w, ropJNBE_w, ropJS_w, ropJNS_w, ropJP_w, ropJNP_w, ropJL_w, ropJNL_w, ropJLE_w, ropJNLE_w, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ ropPUSH_FS_16, ropPOP_FS_16, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_GS_16, ropPOP_GS_16, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, ropLSS, NULL, ropLFS, ropLGS, ropMOVZX_w_b, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVSX_w_b, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ ropPUNPCKLBW, ropPUNPCKLWD, ropPUNPCKLDQ, ropPACKSSWB, ropPCMPGTB, ropPCMPGTW, ropPCMPGTD, ropPACKUSWB, ropPUNPCKHBW, ropPUNPCKHWD, ropPUNPCKHDQ, ropPACKSSDW, NULL, NULL, ropMOVD_mm_l, ropMOVQ_mm_q, +/*70*/ NULL, ropPSxxW_imm, ropPSxxD_imm, ropPSxxQ_imm, ropPCMPEQB, ropPCMPEQW, ropPCMPEQD, ropEMMS, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVD_l_mm, ropMOVQ_q_mm, + +/*80*/ ropJO_l, ropJNO_l, ropJB_l, ropJNB_l, ropJE_l, ropJNE_l, ropJBE_l, ropJNBE_l, ropJS_l, ropJNS_l, ropJP_l, ropJNP_l, ropJL_l, ropJNL_l, ropJLE_l, ropJNLE_l, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ ropPUSH_FS_32, ropPOP_FS_32, NULL, NULL, NULL, NULL, NULL, NULL, ropPUSH_GS_32, ropPOP_GS_32, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, ropLSS, NULL, ropLFS, ropLGS, ropMOVZX_l_b, ropMOVZX_l_w, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVSX_l_b, ropMOVSX_l_w, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, ropPSRLW, ropPSRLD, ropPSRLQ, NULL, ropPMULLW, NULL, NULL, ropPSUBUSB, ropPSUBUSW, NULL, ropPAND, ropPADDUSB, ropPADDUSW, NULL, ropPANDN, +/*e0*/ NULL, ropPSRAW, ropPSRAD, NULL, NULL, ropPMULHW, NULL, NULL, ropPSUBSB, ropPSUBSW, NULL, ropPOR, ropPADDSB, ropPADDSW, NULL, ropPXOR, +/*f0*/ NULL, ropPSLLW, ropPSLLD, ropPSLLQ, NULL, ropPMADDWD, NULL, NULL, ropPSUBB, ropPSUBW, ropPSUBD, NULL, ropPADDB, ropPADDW, ropPADDD, NULL, +}; + + +RecompOpFn recomp_opcodes_d8[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*10*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*20*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*30*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*40*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*50*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*60*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*70*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*80*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*90*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*a0*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*b0*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*c0*/ ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, +/*d0*/ ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, +/*e0*/ ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, +/*f0*/ ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*10*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*20*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*30*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*40*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*50*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*60*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*70*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*80*/ ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFADDs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, ropFMULs, +/*90*/ ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, ropFCOMPs, +/*a0*/ ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, ropFSUBRs, +/*b0*/ ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, ropFDIVRs, + +/*c0*/ ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFADD, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, ropFMUL, +/*d0*/ ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, +/*e0*/ ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUB, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, ropFSUBR, +/*f0*/ ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIV, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, ropFDIVR, +}; + +RecompOpFn recomp_opcodes_d9[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*40*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*80*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*c0*/ ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFCHS, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLD1, ropFLDL2T, ropFLDL2E, ropFLDPI, ropFLDEG2, ropFLDLN2, ropFLDZ, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*40*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*80*/ ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, ropFLDs, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, ropFSTPs, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, ropFLDCW, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, ropFSTCW, + +/*c0*/ ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFLD, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, ropFXCH, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFCHS, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFLD1, ropFLDL2T, ropFLDL2E, ropFLDPI, ropFLDEG2, ropFLDLN2, ropFLDZ, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_da[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*10*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*20*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*30*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*40*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*50*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*60*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*70*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*80*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*90*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*a0*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*b0*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*10*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*20*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*30*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*40*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*50*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*60*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*70*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*80*/ ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFADDil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, ropFMULil, +/*90*/ ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, ropFCOMPil, +/*a0*/ ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, ropFSUBRil, +/*b0*/ ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, ropFDIVRil, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_db[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*80*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, ropFILDl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, ropFISTPl, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_dc[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*10*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*20*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*30*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*40*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*50*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*60*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*70*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*80*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*90*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*a0*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*b0*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*c0*/ ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, +/*d0*/ ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, +/*e0*/ ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, +/*f0*/ ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*10*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*20*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*30*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*40*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*50*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*60*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*70*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*80*/ ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFADDd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, ropFMULd, +/*90*/ ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, ropFCOMPd, +/*a0*/ ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, ropFSUBRd, +/*b0*/ ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, ropFDIVRd, + +/*c0*/ ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFADDr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, ropFMULr, +/*d0*/ ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOM, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, ropFCOMP, +/*e0*/ ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBRr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, ropFSUBr, +/*f0*/ ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVRr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, ropFDIVr, +}; + +RecompOpFn recomp_opcodes_dd[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, ropFLDd, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, ropFSTPd, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFST, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, ropFSTP, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_de[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*10*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*20*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*30*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*40*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*50*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*60*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*70*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*80*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*90*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*a0*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*b0*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*c0*/ ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFCOMPP, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, +/*f0*/ ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*10*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*20*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*30*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*40*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*50*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*60*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*70*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*80*/ ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFADDiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, ropFMULiw, +/*90*/ ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, ropFCOMPiw, +/*a0*/ ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, ropFSUBRiw, +/*b0*/ ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIViw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, ropFDIVRiw, + +/*c0*/ ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFADDP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, ropFMULP, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFCOMPP, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBRP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, ropFSUBP, +/*f0*/ ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVRP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, ropFDIVP, +}; + +RecompOpFn recomp_opcodes_df[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*40*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*80*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFSTSW_AX, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*40*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*80*/ ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, ropFILDw, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, ropFISTPw, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, ropFILDq, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, ropFISTPq, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ ropFSTSW_AX, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_REPE[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +RecompOpFn recomp_opcodes_REPNE[512] = +{ + /*16-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /*32-bit data*/ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*80*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*a0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*b0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; diff --git a/src/codegen/codegen_ops.h b/src/codegen/codegen_ops.h new file mode 100644 index 000000000..fe54cb9a4 --- /dev/null +++ b/src/codegen/codegen_ops.h @@ -0,0 +1,46 @@ +#ifndef _CODEGEN_OPS_H_ +#define _CODEGEN_OPS_H_ + +#include "codegen.h" + +typedef uint32_t (*RecompOpFn)(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block); + +extern RecompOpFn recomp_opcodes[512]; +extern RecompOpFn recomp_opcodes_0f[512]; +extern RecompOpFn recomp_opcodes_d8[512]; +extern RecompOpFn recomp_opcodes_d9[512]; +extern RecompOpFn recomp_opcodes_da[512]; +extern RecompOpFn recomp_opcodes_db[512]; +extern RecompOpFn recomp_opcodes_dc[512]; +extern RecompOpFn recomp_opcodes_dd[512]; +extern RecompOpFn recomp_opcodes_de[512]; +extern RecompOpFn recomp_opcodes_df[512]; +extern RecompOpFn recomp_opcodes_REPE[512]; +extern RecompOpFn recomp_opcodes_REPNE[512]; + +#define REG_EAX 0 +#define REG_ECX 1 +#define REG_EDX 2 +#define REG_EBX 3 +#define REG_ESP 4 +#define REG_EBP 5 +#define REG_ESI 6 +#define REG_EDI 7 +#define REG_AX 0 +#define REG_CX 1 +#define REG_DX 2 +#define REG_BX 3 +#define REG_SP 4 +#define REG_BP 5 +#define REG_SI 6 +#define REG_DI 7 +#define REG_AL 0 +#define REG_AH 4 +#define REG_CL 1 +#define REG_CH 5 +#define REG_DL 2 +#define REG_DH 6 +#define REG_BL 3 +#define REG_BH 7 + +#endif diff --git a/src/codegen/codegen_ops_arith.h b/src/codegen/codegen_ops_arith.h new file mode 100644 index 000000000..ed2ce1ece --- /dev/null +++ b/src/codegen/codegen_ops_arith.h @@ -0,0 +1,1028 @@ +static uint32_t ropINC_rw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + CALL_FUNC((uintptr_t)flags_rebuild_c); + + host_reg = LOAD_REG_W(opcode & 7); + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_W(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_INC16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + codegen_flags_changed = 1; + + return op_pc; +} +static uint32_t ropINC_rl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + CALL_FUNC((uintptr_t)flags_rebuild_c); + + host_reg = LOAD_REG_L(opcode & 7); + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_INC32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + codegen_flags_changed = 1; + + return op_pc; +} +static uint32_t ropDEC_rw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + CALL_FUNC((uintptr_t)flags_rebuild_c); + + host_reg = LOAD_REG_W(opcode & 7); + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_W(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_DEC16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + codegen_flags_changed = 1; + + return op_pc; +} +static uint32_t ropDEC_rl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + CALL_FUNC((uintptr_t)flags_rebuild_c); + + host_reg = LOAD_REG_L(opcode & 7); + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_DEC32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + codegen_flags_changed = 1; + + return op_pc; +} + +#define ROP_ARITH_RMW(name, op, writeback) \ + static uint32_t rop ## name ## _b_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 8); \ + src_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_B(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_B_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _w_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_W(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 16); \ + src_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_W(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_W_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _l_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_L(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 32); \ + src_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_L(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_L_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } + +#define ROP_ARITH_RM(name, op, writeback) \ + static uint32_t rop ## name ## _b_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_B(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 8); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_B(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_B_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _w_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_W(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 16); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_W(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_W_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _l_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_L(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ ## op ## 32); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, dst_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op2, src_reg); \ + op ## _HOST_REG_L(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_L_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + codegen_flags_changed = 1; \ + return op_pc + 1; \ + } + +ROP_ARITH_RMW(ADD, ADD, 1) +ROP_ARITH_RMW(SUB, SUB, 1) +ROP_ARITH_RM(ADD, ADD, 1) +ROP_ARITH_RM(SUB, SUB, 1) + +static uint32_t ropCMP_b_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_B(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_B(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_B((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_B(dst_reg, src_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropCMP_w_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_W(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_W(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_W((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_W(dst_reg, src_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropCMP_l_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_L(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_L(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_L((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_L(dst_reg, src_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} + +static uint32_t ropCMP_b_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + dst_reg = LOAD_REG_B(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_B(target_seg); + dst_reg = 0; + } + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + src_reg = LOAD_REG_B((fetchdat >> 3) & 7); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_B(dst_reg, src_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropCMP_w_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + dst_reg = LOAD_REG_W(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_W(target_seg); + dst_reg = 0; + } \ + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + src_reg = LOAD_REG_W((fetchdat >> 3) & 7); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_W(dst_reg, src_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropCMP_l_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + dst_reg = LOAD_REG_L(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_L(target_seg); + dst_reg = 0; + } + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + src_reg = LOAD_REG_L((fetchdat >> 3) & 7); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, dst_reg); + dst_reg = CMP_HOST_REG_L(dst_reg, src_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op2, src_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} + + +static uint32_t ropADD_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_B(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_B_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropADD_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_W(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} +static uint32_t ropADD_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + fetchdat = fastreadl(cs + op_pc); + ADD_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 4; +} + +static uint32_t ropCMP_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_B(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropCMP_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_W(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} +static uint32_t ropCMP_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + fetchdat = fastreadl(cs + op_pc); + host_reg = CMP_HOST_REG_IMM_L(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 4; +} + +static uint32_t ropSUB_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_B(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_B_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 1; +} +static uint32_t ropSUB_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_W(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} +static uint32_t ropSUB_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + fetchdat = fastreadl(cs + op_pc); + SUB_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + codegen_flags_changed = 1; + return op_pc + 4; +} + +static uint32_t rop80(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + uint32_t imm; + x86seg *target_seg = NULL; + + if ((fetchdat & 0x30) == 0x10) + return 0; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_B(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE(target_seg); + host_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_B(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + + switch (fetchdat & 0x38) + { + case 0x00: /*ADD*/ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_B(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD8); + break; + case 0x08: /*OR*/ + OR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + break; + case 0x20: /*AND*/ + AND_HOST_REG_IMM(host_reg, imm | 0xffffff00); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + break; + case 0x28: /*SUB*/ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_B(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + break; + case 0x30: /*XOR*/ + XOR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + break; + case 0x38: /*CMP*/ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_B(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + break; + } + + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0x38) != 0x38) + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_B_RELEASE(host_reg); + } + } + else + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} + +static uint32_t rop81_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + uint32_t imm; + x86seg *target_seg = NULL; + + if ((fetchdat & 0x30) == 0x10) + return 0; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_W(target_seg); + host_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); + } + imm = fastreadw(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_W(fetchdat & 7); + imm = (fetchdat >> 8) & 0xffff; + } + + switch (fetchdat & 0x38) + { + case 0x00: /*ADD*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD16); + break; + case 0x08: /*OR*/ + OR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x20: /*AND*/ + AND_HOST_REG_IMM(host_reg, imm | 0xffff0000); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x28: /*SUB*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + break; + case 0x30: /*XOR*/ + XOR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x38: /*CMP*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + break; + } + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0x38) != 0x38) + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_W_RELEASE(host_reg); + } + } + else + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 3; +} +static uint32_t rop81_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + uint32_t imm; + x86seg *target_seg = NULL; + + if ((fetchdat & 0x30) == 0x10) + return 0; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE(target_seg); + host_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); + } + } + else + { + host_reg = LOAD_REG_L(fetchdat & 7); + } + imm = fastreadl(cs + op_pc + 1); + + switch (fetchdat & 0x38) + { + case 0x00: /*ADD*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD32); + break; + case 0x08: /*OR*/ + OR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x20: /*AND*/ + AND_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x28: /*SUB*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + break; + case 0x30: /*XOR*/ + XOR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x38: /*CMP*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_L(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + break; + } + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0x38) != 0x38) + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_L_RELEASE(host_reg); + } + } + else + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 5; +} + +static uint32_t rop83_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + uint32_t imm; + x86seg *target_seg = NULL; + + if ((fetchdat & 0x30) == 0x10) + return 0; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_W(target_seg); + host_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_W(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + + if (imm & 0x80) + imm |= 0xff80; + + switch (fetchdat & 0x38) + { + case 0x00: /*ADD*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD16); + break; + case 0x08: /*OR*/ + OR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x20: /*AND*/ + AND_HOST_REG_IMM(host_reg, imm | 0xffff0000); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x28: /*SUB*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + break; + case 0x30: /*XOR*/ + XOR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + break; + case 0x38: /*CMP*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_W(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + break; + } + + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0x38) != 0x38) + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_W_RELEASE(host_reg); + } + } + else + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} +static uint32_t rop83_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + uint32_t imm; + x86seg *target_seg = NULL; + + if ((fetchdat & 0x30) == 0x10) + return 0; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_L(target_seg); + host_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_L(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + + if (imm & 0x80) + imm |= 0xffffff80; + + switch (fetchdat & 0x38) + { + case 0x00: /*ADD*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ADD32); + break; + case 0x08: /*OR*/ + OR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x20: /*AND*/ + AND_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x28: /*SUB*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + break; + case 0x30: /*XOR*/ + XOR_HOST_REG_IMM(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + break; + case 0x38: /*CMP*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + host_reg = CMP_HOST_REG_IMM_L(host_reg, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, imm); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + break; + } + + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0x38) != 0x38) + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_L_RELEASE(host_reg); + } + } + else + RELEASE_REG(host_reg); + + codegen_flags_changed = 1; + return op_pc + 2; +} diff --git a/src/codegen/codegen_ops_fpu.h b/src/codegen/codegen_ops_fpu.h new file mode 100644 index 000000000..481eadb8c --- /dev/null +++ b/src/codegen/codegen_ops_fpu.h @@ -0,0 +1,645 @@ +static uint32_t ropFXCH(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + + FP_FXCH(opcode & 7); + + return op_pc; +} + +static uint32_t ropFLD(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + + FP_FLD(opcode & 7); + + return op_pc; +} + +static uint32_t ropFST(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + + FP_FST(opcode & 7); + + return op_pc; +} +static uint32_t ropFSTP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + + FP_FST(opcode & 7); + FP_POP(); + + return op_pc; +} + + +static uint32_t ropFLDs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_LOAD_S(); + + return op_pc + 1; +} +static uint32_t ropFLDd(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_Q(target_seg); + + FP_LOAD_D(); + + return op_pc + 1; +} + +static uint32_t ropFILDw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_W(target_seg); + + FP_LOAD_IW(); + + return op_pc + 1; +} +static uint32_t ropFILDl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_LOAD_IL(); + + return op_pc + 1; +} +static uint32_t ropFILDq(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_Q(target_seg); + + FP_LOAD_IQ(); + + codegen_fpu_loaded_iq[(cpu_state.TOP - 1) & 7] = 1; + + return op_pc + 1; +} + +static uint32_t ropFSTs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + host_reg = FP_LOAD_REG(0); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_L(target_seg, host_reg); + + return op_pc + 1; +} +static uint32_t ropFSTd(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg1, host_reg2; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + FP_LOAD_REG_D(0, &host_reg1, &host_reg2); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 7); + + MEM_STORE_ADDR_EA_Q(target_seg, host_reg1, host_reg2); + + return op_pc + 1; +} + +static uint32_t ropFSTPs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t new_pc = ropFSTs(opcode, fetchdat, op_32, op_pc, block); + + FP_POP(); + + return new_pc; +} +static uint32_t ropFSTPd(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t new_pc = ropFSTd(opcode, fetchdat, op_32, op_pc, block); + + FP_POP(); + + return new_pc; +} + +#define ropFarith(name, size, load, op) \ +static uint32_t ropF ## name ## size(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + x86seg *target_seg; \ + \ + FP_ENTER(); \ + op_pc--; \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + \ + CHECK_SEG_READ(target_seg); \ + load(target_seg); \ + \ + op(FPU_ ## name); \ + \ + return op_pc + 1; \ +} + +ropFarith(ADD, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(DIV, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(DIVR, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(MUL, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(SUB, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(SUBR, s, MEM_LOAD_ADDR_EA_L, FP_OP_S); +ropFarith(ADD, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(DIV, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(DIVR, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(MUL, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(SUB, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(SUBR, d, MEM_LOAD_ADDR_EA_Q, FP_OP_D); +ropFarith(ADD, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(DIV, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(DIVR, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(MUL, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(SUB, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(SUBR, iw, MEM_LOAD_ADDR_EA_W, FP_OP_IW); +ropFarith(ADD, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); +ropFarith(DIV, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); +ropFarith(DIVR, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); +ropFarith(MUL, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); +ropFarith(SUB, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); +ropFarith(SUBR, il, MEM_LOAD_ADDR_EA_L, FP_OP_IL); + +#define ropFcompare(name, size, load, op) \ +static uint32_t ropF ## name ## size(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + x86seg *target_seg; \ + \ + FP_ENTER(); \ + op_pc--; \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + \ + CHECK_SEG_READ(target_seg); \ + load(target_seg); \ + \ + op(); \ + \ + return op_pc + 1; \ +} \ +static uint32_t ropF ## name ## P ## size(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + uint32_t new_pc = ropF ## name ## size(opcode, fetchdat, op_32, op_pc, block); \ + \ + FP_POP(); \ + \ + return new_pc; \ +} + +ropFcompare(COM, s, MEM_LOAD_ADDR_EA_L, FP_COMPARE_S); +ropFcompare(COM, d, MEM_LOAD_ADDR_EA_Q, FP_COMPARE_D); +ropFcompare(COM, iw, MEM_LOAD_ADDR_EA_W, FP_COMPARE_IW); +ropFcompare(COM, il, MEM_LOAD_ADDR_EA_L, FP_COMPARE_IL); + +/*static uint32_t ropFADDs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_OP_S(FPU_ADD); + + return op_pc + 1; +} +static uint32_t ropFDIVs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_OP_S(FPU_DIV); + + return op_pc + 1; +} +static uint32_t ropFMULs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_OP_S(FPU_MUL); + + return op_pc + 1; +} +static uint32_t ropFSUBs(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_L(target_seg); + + FP_OP_S(FPU_SUB); + + return op_pc + 1; +}*/ + + +static uint32_t ropFADD(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_ADD, 0, opcode & 7); + + return op_pc; +} +static uint32_t ropFCOM(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_COMPARE_REG(0, opcode & 7); + + return op_pc; +} +static uint32_t ropFDIV(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIV, 0, opcode & 7); + + return op_pc; +} +static uint32_t ropFDIVR(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIVR, 0, opcode & 7); + + return op_pc; +} +static uint32_t ropFMUL(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_MUL, 0, opcode & 7); + + return op_pc; +} +static uint32_t ropFSUB(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUB, 0, opcode & 7); + + return op_pc; +} +static uint32_t ropFSUBR(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUBR, 0, opcode & 7); + + return op_pc; +} + +static uint32_t ropFADDr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_ADD, opcode & 7, 0); + + return op_pc; +} +static uint32_t ropFDIVr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIV, opcode & 7, 0); + + return op_pc; +} +static uint32_t ropFDIVRr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIVR, opcode & 7, 0); + + return op_pc; +} +static uint32_t ropFMULr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_MUL, opcode & 7, 0); + + return op_pc; +} +static uint32_t ropFSUBr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUB, opcode & 7, 0); + + return op_pc; +} +static uint32_t ropFSUBRr(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUBR, opcode & 7, 0); + + return op_pc; +} + +static uint32_t ropFADDP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_ADD, opcode & 7, 0); + FP_POP(); + + return op_pc; +} +static uint32_t ropFCOMP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_COMPARE_REG(0, opcode & 7); + FP_POP(); + + return op_pc; +} +static uint32_t ropFDIVP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIV, opcode & 7, 0); + FP_POP(); + + return op_pc; +} +static uint32_t ropFDIVRP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_DIVR, opcode & 7, 0); + FP_POP(); + + return op_pc; +} +static uint32_t ropFMULP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_MUL, opcode & 7, 0); + FP_POP(); + + return op_pc; +} +static uint32_t ropFSUBP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUB, opcode & 7, 0); + FP_POP(); + + return op_pc; +} +static uint32_t ropFSUBRP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_OP_REG(FPU_SUBR, opcode & 7, 0); + FP_POP(); + + return op_pc; +} + +static uint32_t ropFCOMPP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_COMPARE_REG(0, 1); + FP_POP2(); + + return op_pc; +} + +static uint32_t ropFSTSW_AX(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + FP_ENTER(); + host_reg = LOAD_VAR_W((uintptr_t)&cpu_state.npxs); + STORE_REG_TARGET_W_RELEASE(host_reg, REG_AX); + + return op_pc; +} + + +static uint32_t ropFISTw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + host_reg = FP_LOAD_REG_INT_W(0); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_W(target_seg, host_reg); + + return op_pc + 1; +} +static uint32_t ropFISTl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + host_reg = FP_LOAD_REG_INT(0); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_L(target_seg, host_reg); + + return op_pc + 1; +} + +static uint32_t ropFISTPw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t new_pc = ropFISTw(opcode, fetchdat, op_32, op_pc, block); + + FP_POP(); + + return new_pc; +} +static uint32_t ropFISTPl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t new_pc = ropFISTl(opcode, fetchdat, op_32, op_pc, block); + + FP_POP(); + + return new_pc; +} +static uint32_t ropFISTPq(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg1, host_reg2; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + FP_LOAD_REG_INT_Q(0, &host_reg1, &host_reg2); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_Q(target_seg, host_reg1, host_reg2); + + FP_POP(); + + return op_pc + 1; +} + + +static uint32_t ropFLDCW(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_W(target_seg); + STORE_HOST_REG_ADDR_W((uintptr_t)&cpu_state.npxc, 0); + UPDATE_NPXC(0); + + return op_pc + 1; +} +static uint32_t ropFSTCW(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + x86seg *target_seg; + + FP_ENTER(); + op_pc--; + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + CHECK_SEG_WRITE(target_seg); + + host_reg = LOAD_VAR_W((uintptr_t)&cpu_state.npxc); + MEM_STORE_ADDR_EA_W(target_seg, host_reg); + + return op_pc + 1; +} + + +static uint32_t ropFCHS(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_FCHS(); + + return op_pc; +} + +#define opFLDimm(name, v) \ + static uint32_t ropFLD ## name(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + static double fp_imm = v; \ + \ + FP_ENTER(); \ + FP_LOAD_IMM_Q(*(uint64_t *)&fp_imm); \ + \ + return op_pc; \ + } + +opFLDimm(1, 1.0) +opFLDimm(L2T, 3.3219280948873623) +opFLDimm(L2E, 1.4426950408889634); +opFLDimm(PI, 3.141592653589793); +opFLDimm(EG2, 0.3010299956639812); +opFLDimm(Z, 0.0) + +static uint32_t ropFLDLN2(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + FP_ENTER(); + FP_LOAD_IMM_Q(0x3fe62e42fefa39f0ull); + + return op_pc; +} diff --git a/src/codegen/codegen_ops_jump.h b/src/codegen/codegen_ops_jump.h new file mode 100644 index 000000000..0ad293744 --- /dev/null +++ b/src/codegen/codegen_ops_jump.h @@ -0,0 +1,270 @@ +static uint32_t ropJMP_r8(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t offset = fetchdat & 0xff; + + if (offset & 0x80) + offset |= 0xffffff00; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, op_pc+1+offset); + + return -1; +} + +static uint32_t ropJMP_r16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t offset = fetchdat & 0xffff; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, (op_pc+2+offset) & 0xffff); + + return -1; +} + +static uint32_t ropJMP_r32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t offset = fastreadl(cs + op_pc); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, op_pc+4+offset); + + return -1; +} + + +static uint32_t ropJCXZ(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t offset = fetchdat & 0xff; + + if (offset & 0x80) + offset |= 0xffffff00; + + if (op_32 & 0x200) + { + int host_reg = LOAD_REG_L(REG_ECX); + TEST_ZERO_JUMP_L(host_reg, op_pc+1+offset, 0); + } + else + { + int host_reg = LOAD_REG_W(REG_CX); + TEST_ZERO_JUMP_W(host_reg, op_pc+1+offset, 0); + } + + return op_pc+1; +} + +static uint32_t ropLOOP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t offset = fetchdat & 0xff; + + if (offset & 0x80) + offset |= 0xffffff00; + + if (op_32 & 0x200) + { + int host_reg = LOAD_REG_L(REG_ECX); + SUB_HOST_REG_IMM(host_reg, 1); + STORE_REG_L_RELEASE(host_reg); + TEST_NONZERO_JUMP_L(host_reg, op_pc+1+offset, 0); + } + else + { + int host_reg = LOAD_REG_W(REG_CX); + SUB_HOST_REG_IMM(host_reg, 1); + STORE_REG_W_RELEASE(host_reg); + TEST_NONZERO_JUMP_W(host_reg, op_pc+1+offset, 0); + } + + return op_pc+1; +} + +static void BRANCH_COND_B(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + CALL_FUNC((uintptr_t)CF_SET); + if (not) + TEST_ZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); +} + +static void BRANCH_COND_E(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + int host_reg; + + switch (codegen_flags_changed ? cpu_state.flags_op : FLAGS_UNKNOWN) + { + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + case FLAGS_ADD8: + case FLAGS_ADD16: + case FLAGS_ADD32: + case FLAGS_SUB8: + case FLAGS_SUB16: + case FLAGS_SUB32: + case FLAGS_SHL8: + case FLAGS_SHL16: + case FLAGS_SHL32: + case FLAGS_SHR8: + case FLAGS_SHR16: + case FLAGS_SHR32: + case FLAGS_SAR8: + case FLAGS_SAR16: + case FLAGS_SAR32: + case FLAGS_INC8: + case FLAGS_INC16: + case FLAGS_INC32: + case FLAGS_DEC8: + case FLAGS_DEC16: + case FLAGS_DEC32: + host_reg = LOAD_VAR_L((uintptr_t)&cpu_state.flags_res); + if (not) + TEST_NONZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + else + TEST_ZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + break; + + case FLAGS_UNKNOWN: + CALL_FUNC((uintptr_t)ZF_SET); + if (not) + TEST_ZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + break; + } +} + +static void BRANCH_COND_O(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + CALL_FUNC((uintptr_t)VF_SET); + if (not) + TEST_ZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); +} + +static void BRANCH_COND_P(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + CALL_FUNC((uintptr_t)PF_SET); + if (not) + TEST_ZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); +} + +static void BRANCH_COND_S(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + int host_reg; + + switch (codegen_flags_changed ? cpu_state.flags_op : FLAGS_UNKNOWN) + { + case FLAGS_ZN8: + case FLAGS_ADD8: + case FLAGS_SUB8: + case FLAGS_SHL8: + case FLAGS_SHR8: + case FLAGS_SAR8: + case FLAGS_INC8: + case FLAGS_DEC8: + host_reg = LOAD_VAR_L((uintptr_t)&cpu_state.flags_res); + AND_HOST_REG_IMM(host_reg, 0x80); + if (not) + TEST_ZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + break; + + case FLAGS_ZN16: + case FLAGS_ADD16: + case FLAGS_SUB16: + case FLAGS_SHL16: + case FLAGS_SHR16: + case FLAGS_SAR16: + case FLAGS_INC16: + case FLAGS_DEC16: + host_reg = LOAD_VAR_L((uintptr_t)&cpu_state.flags_res); + AND_HOST_REG_IMM(host_reg, 0x8000); + if (not) + TEST_ZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + break; + + case FLAGS_ZN32: + case FLAGS_ADD32: + case FLAGS_SUB32: + case FLAGS_SHL32: + case FLAGS_SHR32: + case FLAGS_SAR32: + case FLAGS_INC32: + case FLAGS_DEC32: + host_reg = LOAD_VAR_L((uintptr_t)&cpu_state.flags_res); + AND_HOST_REG_IMM(host_reg, 0x80000000); + if (not) + TEST_ZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(host_reg, op_pc+pc_offset+offset, timing_bt); + break; + + case FLAGS_UNKNOWN: + CALL_FUNC((uintptr_t)NF_SET); + if (not) + TEST_ZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + else + TEST_NONZERO_JUMP_L(0, op_pc+pc_offset+offset, timing_bt); + break; + } +} + + +#define ropBRANCH(name, func, not) \ +static uint32_t rop ## name(uint8_t opcode, uint32_t fetchdat, \ + uint32_t op_32, uint32_t op_pc, \ + codeblock_t *block) \ +{ \ + uint32_t offset = fetchdat & 0xff; \ + \ + if (offset & 0x80) \ + offset |= 0xffffff00; \ + \ + func(1, op_pc, offset, not); \ + \ + return op_pc+1; \ +} \ +static uint32_t rop ## name ## _w(uint8_t opcode, \ + uint32_t fetchdat, uint32_t op_32, \ + uint32_t op_pc, codeblock_t *block) \ +{ \ + uint32_t offset = fetchdat & 0xffff; \ + \ + if (offset & 0x8000) \ + offset |= 0xffff0000; \ + \ + func(2, op_pc, offset, not); \ + \ + return op_pc+2; \ +} \ +static uint32_t rop ## name ## _l(uint8_t opcode, \ + uint32_t fetchdat, uint32_t op_32, \ + uint32_t op_pc, codeblock_t *block) \ +{ \ + uint32_t offset = fastreadl(cs + op_pc); \ + \ + func(4, op_pc, offset, not); \ + \ + return op_pc+4; \ +} + +ropBRANCH(JB, BRANCH_COND_B, 0) +ropBRANCH(JNB, BRANCH_COND_B, 1) +ropBRANCH(JE, BRANCH_COND_E, 0) +ropBRANCH(JNE, BRANCH_COND_E, 1) +ropBRANCH(JO, BRANCH_COND_O, 0) +ropBRANCH(JNO, BRANCH_COND_O, 1) +ropBRANCH(JP, BRANCH_COND_P, 0) +ropBRANCH(JNP, BRANCH_COND_P, 1) +ropBRANCH(JS, BRANCH_COND_S, 0) +ropBRANCH(JNS, BRANCH_COND_S, 1) +ropBRANCH(JL, BRANCH_COND_L, 0) +ropBRANCH(JNL, BRANCH_COND_L, 1) +ropBRANCH(JLE, BRANCH_COND_LE, 0) +ropBRANCH(JNLE, BRANCH_COND_LE, 1) +ropBRANCH(JBE, BRANCH_COND_BE, 0) +ropBRANCH(JNBE, BRANCH_COND_BE, 1) diff --git a/src/codegen/codegen_ops_logic.h b/src/codegen/codegen_ops_logic.h new file mode 100644 index 000000000..c0ffa641a --- /dev/null +++ b/src/codegen/codegen_ops_logic.h @@ -0,0 +1,564 @@ +#define ROP_LOGIC(name, op, writeback) \ + static uint32_t rop ## name ## _b_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); \ + src_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ + op ## _HOST_REG_B(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_B_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _w_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_W(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); \ + src_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ + op ## _HOST_REG_W(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_W_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _l_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_L(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); \ + src_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ + op ## _HOST_REG_L(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_L_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _b_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_B(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); \ + op ## _HOST_REG_B(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_B_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _w_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_W(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); \ + op ## _HOST_REG_W(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_W_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } \ + static uint32_t rop ## name ## _l_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int src_reg, dst_reg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + src_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + MEM_LOAD_ADDR_EA_L(target_seg); \ + src_reg = 0; \ + } \ + \ + dst_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); \ + op ## _HOST_REG_L(dst_reg, src_reg); \ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); \ + if (writeback) STORE_REG_L_RELEASE(dst_reg); \ + else RELEASE_REG(dst_reg); \ + RELEASE_REG(src_reg); \ + \ + return op_pc + 1; \ + } + +ROP_LOGIC(AND, AND, 1) +ROP_LOGIC(OR, OR, 1) +ROP_LOGIC(XOR, XOR, 1) + +static uint32_t ropTEST_b_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_B(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_B(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_B((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + dst_reg = TEST_HOST_REG_B(dst_reg, src_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + return op_pc + 1; +} +static uint32_t ropTEST_w_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_W(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_W(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_W((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + dst_reg = TEST_HOST_REG_W(dst_reg, src_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + return op_pc + 1; +} +static uint32_t ropTEST_l_rm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg; + + if ((fetchdat & 0xc0) == 0xc0) + { + src_reg = LOAD_REG_L(fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_L(target_seg); + src_reg = 0; + } + + dst_reg = LOAD_REG_L((fetchdat >> 3) & 7); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + dst_reg = TEST_HOST_REG_L(dst_reg, src_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); + RELEASE_REG(dst_reg); + RELEASE_REG(src_reg); + + return op_pc + 1; +} + +static uint32_t ropAND_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + AND_HOST_REG_IMM(host_reg, (fetchdat & 0xff) | 0xffffff00); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_B_RELEASE(host_reg); + + return op_pc + 1; +} +static uint32_t ropAND_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + AND_HOST_REG_IMM(host_reg, (fetchdat & 0xffff) | 0xffff0000); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + return op_pc + 2; +} +static uint32_t ropAND_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + fetchdat = fastreadl(cs + op_pc); + AND_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + return op_pc + 4; +} + +static uint32_t ropOR_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + OR_HOST_REG_IMM(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_B_RELEASE(host_reg); + + return op_pc + 1; +} +static uint32_t ropOR_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + OR_HOST_REG_IMM(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + return op_pc + 2; +} +static uint32_t ropOR_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + fetchdat = fastreadl(cs + op_pc); + OR_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + return op_pc + 4; +} + +static uint32_t ropTEST_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + host_reg = TEST_HOST_REG_IMM(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + return op_pc + 1; +} +static uint32_t ropTEST_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + host_reg = TEST_HOST_REG_IMM(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + return op_pc + 2; +} +static uint32_t ropTEST_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + fetchdat = fastreadl(cs + op_pc); + host_reg = TEST_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + + return op_pc + 4; +} + +static uint32_t ropXOR_AL_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B(REG_AL); + + XOR_HOST_REG_IMM(host_reg, fetchdat & 0xff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_B_RELEASE(host_reg); + + return op_pc + 1; +} +static uint32_t ropXOR_AX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W(REG_AX); + + XOR_HOST_REG_IMM(host_reg, fetchdat & 0xffff); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_W_RELEASE(host_reg); + + return op_pc + 2; +} +static uint32_t ropXOR_EAX_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_L(REG_EAX); + + fetchdat = fastreadl(cs + op_pc); + XOR_HOST_REG_IMM(host_reg, fetchdat); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + STORE_REG_L_RELEASE(host_reg); + + return op_pc + 4; +} + +static uint32_t ropF6(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + uint8_t imm; + + switch (fetchdat & 0x38) + { + case 0x00: /*TEST b,#8*/ + if ((fetchdat & 0xc0) == 0xc0) + { + host_reg = LOAD_REG_B(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + imm = fastreadb(cs + op_pc + 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_B(target_seg); + host_reg = 0; + } + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); + host_reg = TEST_HOST_REG_IMM(host_reg, imm); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + return op_pc + 2; + + case 0x10: /*NOT b*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + host_reg = LOAD_REG_B(fetchdat & 7); + XOR_HOST_REG_IMM(host_reg, 0xff); + STORE_REG_B_RELEASE(host_reg); + return op_pc + 1; + + case 0x18: /*NEG b*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB8); + host_reg = LOAD_REG_B(fetchdat & 7); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op2, host_reg); + NEG_HOST_REG_B(host_reg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op1, 0); + STORE_REG_B_RELEASE(host_reg); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + return op_pc + 1; + } + + return 0; +} +static uint32_t ropF7_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + uint16_t imm; + + switch (fetchdat & 0x38) + { + case 0x00: /*TEST w,#*/ + if ((fetchdat & 0xc0) == 0xc0) + { + host_reg = LOAD_REG_W(fetchdat & 7); + imm = (fetchdat >> 8) & 0xffff; + } + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + imm = fastreadw(cs + op_pc + 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); + host_reg = TEST_HOST_REG_IMM(host_reg, imm); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + return op_pc + 3; + + case 0x10: /*NOT w*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + host_reg = LOAD_REG_W(fetchdat & 7); + XOR_HOST_REG_IMM(host_reg, 0xffff); + STORE_REG_W_RELEASE(host_reg); + return op_pc + 1; + + case 0x18: /*NEG w*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB16); + host_reg = LOAD_REG_W(fetchdat & 7); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op2, host_reg); + NEG_HOST_REG_W(host_reg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op1, 0); + STORE_REG_W_RELEASE(host_reg); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + return op_pc + 1; + } + + return 0; +} +static uint32_t ropF7_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg; + int host_reg; + uint32_t imm; + + switch (fetchdat & 0x38) + { + case 0x00: /*TEST l,#*/ + if ((fetchdat & 0xc0) == 0xc0) + { + host_reg = LOAD_REG_L(fetchdat & 7); + imm = fastreadl(cs + op_pc + 1); + } + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + imm = fastreadl(cs + op_pc + 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); + host_reg = TEST_HOST_REG_IMM(host_reg, imm); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + RELEASE_REG(host_reg); + return op_pc + 5; + + case 0x10: /*NOT l*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + host_reg = LOAD_REG_L(fetchdat & 7); + XOR_HOST_REG_IMM(host_reg, 0xffffffff); + STORE_REG_L_RELEASE(host_reg); + return op_pc + 1; + + case 0x18: /*NEG l*/ + if ((fetchdat & 0xc0) != 0xc0) + return 0; + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SUB32); + host_reg = LOAD_REG_L(fetchdat & 7); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op2, host_reg); + NEG_HOST_REG_L(host_reg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op1, 0); + STORE_REG_L_RELEASE(host_reg); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + return op_pc + 1; + } + + return 0; +} diff --git a/src/codegen/codegen_ops_misc.h b/src/codegen/codegen_ops_misc.h new file mode 100644 index 000000000..1752c4180 --- /dev/null +++ b/src/codegen/codegen_ops_misc.h @@ -0,0 +1,269 @@ +static uint32_t ropNOP(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + return op_pc; +} + +static uint32_t ropCLD(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + CLEAR_BITS((uintptr_t)&cpu_state.flags, D_FLAG); + return op_pc; +} +static uint32_t ropSTD(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + SET_BITS((uintptr_t)&cpu_state.flags, D_FLAG); + return op_pc; +} + +static uint32_t ropCLI(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if (!IOPLp && (cr4 & (CR4_VME | CR4_PVI))) + return 0; + CLEAR_BITS((uintptr_t)&cpu_state.flags, I_FLAG); + return op_pc; +} +static uint32_t ropSTI(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if (!IOPLp && (cr4 & (CR4_VME | CR4_PVI))) + return 0; + SET_BITS((uintptr_t)&cpu_state.flags, I_FLAG); + return op_pc; +} + +static uint32_t ropFE(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int host_reg; + + if ((fetchdat & 0x30) != 0x00) + return 0; + + CALL_FUNC((uintptr_t)flags_rebuild_c); + + if ((fetchdat & 0xc0) == 0xc0) + host_reg = LOAD_REG_B(fetchdat & 7); + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + SAVE_EA(); + MEM_CHECK_WRITE(target_seg); + host_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); + } + + switch (fetchdat & 0x38) + { + case 0x00: /*INC*/ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_B(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_INC8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + break; + case 0x08: /*DEC*/ + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_B(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_DEC8); + STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, host_reg); + break; + } + + if ((fetchdat & 0xc0) == 0xc0) + STORE_REG_B_RELEASE(host_reg); + else + { + LOAD_EA(); + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, host_reg); + } + codegen_flags_changed = 1; + + return op_pc + 1; +} +static uint32_t codegen_temp; +static uint32_t ropFF_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int host_reg; + + if ((fetchdat & 0x30) != 0x00 && (fetchdat & 0x08)) + return 0; + + if ((fetchdat & 0x30) == 0x00) + CALL_FUNC((uintptr_t)flags_rebuild_c); + + if ((fetchdat & 0xc0) == 0xc0) + host_reg = LOAD_REG_W(fetchdat & 7); + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + if ((fetchdat & 0x30) != 0x00) + { + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_W(target_seg); + host_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); + } + } + + switch (fetchdat & 0x38) + { + case 0x00: /*INC*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM_W(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_INC16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0xc0) == 0xc0) + STORE_REG_W_RELEASE(host_reg); + else + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + codegen_flags_changed = 1; + return op_pc + 1; + case 0x08: /*DEC*/ + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM_W(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_DEC16); + STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0xc0) == 0xc0) + STORE_REG_W_RELEASE(host_reg); + else + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + codegen_flags_changed = 1; + return op_pc + 1; + + case 0x10: /*CALL*/ + STORE_HOST_REG_ADDR_W((uintptr_t)&codegen_temp, host_reg); + RELEASE_REG(host_reg); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + host_reg = LOAD_REG_IMM(op_pc + 1); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + + host_reg = LOAD_VAR_W((uintptr_t)&codegen_temp); + STORE_HOST_REG_ADDR_W((uintptr_t)&cpu_state.pc, host_reg); + return -1; + + case 0x20: /*JMP*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, host_reg); + return -1; + + case 0x30: /*PUSH*/ + if (!host_reg) + host_reg = LOAD_HOST_REG(host_reg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + return op_pc + 1; + } + return 0; +} +static uint32_t ropFF_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int host_reg; + + if ((fetchdat & 0x30) != 0x00 && (fetchdat & 0x08)) + return 0; + + if ((fetchdat & 0x30) == 0x00) + CALL_FUNC((uintptr_t)flags_rebuild_c); + + if ((fetchdat & 0xc0) == 0xc0) + host_reg = LOAD_REG_L(fetchdat & 7); + else + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + if ((fetchdat & 0x30) != 0x00) + { + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_L(target_seg); + host_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); + } + } + + switch (fetchdat & 0x38) + { + case 0x00: /*INC*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + ADD_HOST_REG_IMM(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_INC32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0xc0) == 0xc0) + STORE_REG_L_RELEASE(host_reg); + else + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + codegen_flags_changed = 1; + return op_pc + 1; + case 0x08: /*DEC*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_op1, host_reg); + SUB_HOST_REG_IMM(host_reg, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, 1); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_DEC32); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, host_reg); + if ((fetchdat & 0xc0) == 0xc0) + STORE_REG_L_RELEASE(host_reg); + else + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + codegen_flags_changed = 1; + return op_pc + 1; + + case 0x10: /*CALL*/ + STORE_HOST_REG_ADDR((uintptr_t)&codegen_temp, host_reg); + RELEASE_REG(host_reg); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + host_reg = LOAD_REG_IMM(op_pc + 1); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + + host_reg = LOAD_VAR_L((uintptr_t)&codegen_temp); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, host_reg); + return -1; + + case 0x20: /*JMP*/ + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, host_reg); + return -1; + + case 0x30: /*PUSH*/ + if (!host_reg) + host_reg = LOAD_HOST_REG(host_reg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + return op_pc + 1; + } + return 0; +} diff --git a/src/codegen/codegen_ops_mmx.h b/src/codegen/codegen_ops_mmx.h new file mode 100644 index 000000000..14730cb93 --- /dev/null +++ b/src/codegen/codegen_ops_mmx.h @@ -0,0 +1,277 @@ +static uint32_t ropMOVQ_q_mm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg1, host_reg2; + + MMX_ENTER(); + + LOAD_MMX_Q((fetchdat >> 3) & 7, &host_reg1, &host_reg2); + + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_MMX_Q(fetchdat & 7, host_reg1, host_reg2); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 7); + + MEM_STORE_ADDR_EA_Q(target_seg, host_reg1, host_reg2); + } + + return op_pc + 1; +} + +static uint32_t ropMOVQ_mm_q(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + MMX_ENTER(); + + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg1, host_reg2; + + LOAD_MMX_Q(fetchdat & 7, &host_reg1, &host_reg2); + STORE_MMX_Q((fetchdat >> 3) & 7, host_reg1, host_reg2); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_Q(target_seg); + STORE_MMX_Q((fetchdat >> 3) & 7, LOAD_Q_REG_1, LOAD_Q_REG_2); + } + + return op_pc + 1; +} + +static uint32_t ropMOVD_l_mm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + MMX_ENTER(); + + host_reg = LOAD_MMX_D((fetchdat >> 3) & 7); + + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_REG_TARGET_L_RELEASE(host_reg, fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 3); + + MEM_STORE_ADDR_EA_L(target_seg, host_reg); + } + + return op_pc + 1; +} +static uint32_t ropMOVD_mm_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + MMX_ENTER(); + + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_L(fetchdat & 7); + STORE_MMX_LQ((fetchdat >> 3) & 7, host_reg); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_L(target_seg); + STORE_MMX_LQ((fetchdat >> 3) & 7, 0); + } + + return op_pc + 1; +} + +#define MMX_OP(name, func) \ +static uint32_t name(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + int src_reg1, src_reg2; \ + int xmm_src, xmm_dst; \ + \ + MMX_ENTER(); \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + xmm_src = LOAD_MMX_Q_MMX(fetchdat & 7); \ + } \ + else \ + { \ + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + \ + CHECK_SEG_READ(target_seg); \ + \ + MEM_LOAD_ADDR_EA_Q(target_seg); \ + src_reg1 = LOAD_Q_REG_1; \ + src_reg2 = LOAD_Q_REG_2; \ + xmm_src = LOAD_INT_TO_MMX(src_reg1, src_reg2); \ + } \ + xmm_dst = LOAD_MMX_Q_MMX((fetchdat >> 3) & 7); \ + func(xmm_dst, xmm_src); \ + STORE_MMX_Q_MMX((fetchdat >> 3) & 7, xmm_dst); \ + \ + return op_pc + 1; \ +} + +MMX_OP(ropPAND, MMX_AND) +MMX_OP(ropPANDN, MMX_ANDN) +MMX_OP(ropPOR, MMX_OR) +MMX_OP(ropPXOR, MMX_XOR) + +MMX_OP(ropPADDB, MMX_ADDB) +MMX_OP(ropPADDW, MMX_ADDW) +MMX_OP(ropPADDD, MMX_ADDD) +MMX_OP(ropPADDSB, MMX_ADDSB) +MMX_OP(ropPADDSW, MMX_ADDSW) +MMX_OP(ropPADDUSB, MMX_ADDUSB) +MMX_OP(ropPADDUSW, MMX_ADDUSW) + +MMX_OP(ropPSUBB, MMX_SUBB) +MMX_OP(ropPSUBW, MMX_SUBW) +MMX_OP(ropPSUBD, MMX_SUBD) +MMX_OP(ropPSUBSB, MMX_SUBSB) +MMX_OP(ropPSUBSW, MMX_SUBSW) +MMX_OP(ropPSUBUSB, MMX_SUBUSB) +MMX_OP(ropPSUBUSW, MMX_SUBUSW) + +MMX_OP(ropPUNPCKLBW, MMX_PUNPCKLBW); +MMX_OP(ropPUNPCKLWD, MMX_PUNPCKLWD); +MMX_OP(ropPUNPCKLDQ, MMX_PUNPCKLDQ); +MMX_OP(ropPACKSSWB, MMX_PACKSSWB); +MMX_OP(ropPCMPGTB, MMX_PCMPGTB); +MMX_OP(ropPCMPGTW, MMX_PCMPGTW); +MMX_OP(ropPCMPGTD, MMX_PCMPGTD); +MMX_OP(ropPACKUSWB, MMX_PACKUSWB); +MMX_OP(ropPUNPCKHBW, MMX_PUNPCKHBW); +MMX_OP(ropPUNPCKHWD, MMX_PUNPCKHWD); +MMX_OP(ropPUNPCKHDQ, MMX_PUNPCKHDQ); +MMX_OP(ropPACKSSDW, MMX_PACKSSDW); + +MMX_OP(ropPCMPEQB, MMX_PCMPEQB); +MMX_OP(ropPCMPEQW, MMX_PCMPEQW); +MMX_OP(ropPCMPEQD, MMX_PCMPEQD); + +MMX_OP(ropPSRLW, MMX_PSRLW) +MMX_OP(ropPSRLD, MMX_PSRLD) +MMX_OP(ropPSRLQ, MMX_PSRLQ) +MMX_OP(ropPSRAW, MMX_PSRAW) +MMX_OP(ropPSRAD, MMX_PSRAD) +MMX_OP(ropPSLLW, MMX_PSLLW) +MMX_OP(ropPSLLD, MMX_PSLLD) +MMX_OP(ropPSLLQ, MMX_PSLLQ) + +MMX_OP(ropPMULLW, MMX_PMULLW); +MMX_OP(ropPMULHW, MMX_PMULHW); +MMX_OP(ropPMADDWD, MMX_PMADDWD); + +static uint32_t ropPSxxW_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int xmm_dst; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + if ((fetchdat & 0x08) || !(fetchdat & 0x30)) + return 0; + + MMX_ENTER(); + + xmm_dst = LOAD_MMX_Q_MMX(fetchdat & 7); + switch (fetchdat & 0x38) + { + case 0x10: /*PSRLW*/ + MMX_PSRLW_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x20: /*PSRAW*/ + MMX_PSRAW_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x30: /*PSLLW*/ + MMX_PSLLW_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + } + STORE_MMX_Q_MMX(fetchdat & 7, xmm_dst); + + return op_pc + 2; +} +static uint32_t ropPSxxD_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int xmm_dst; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + if ((fetchdat & 0x08) || !(fetchdat & 0x30)) + return 0; + + MMX_ENTER(); + + xmm_dst = LOAD_MMX_Q_MMX(fetchdat & 7); + switch (fetchdat & 0x38) + { + case 0x10: /*PSRLD*/ + MMX_PSRLD_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x20: /*PSRAD*/ + MMX_PSRAD_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x30: /*PSLLD*/ + MMX_PSLLD_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + } + STORE_MMX_Q_MMX(fetchdat & 7, xmm_dst); + + return op_pc + 2; +} +static uint32_t ropPSxxQ_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int xmm_dst; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + if ((fetchdat & 0x08) || !(fetchdat & 0x30)) + return 0; + + MMX_ENTER(); + + xmm_dst = LOAD_MMX_Q_MMX(fetchdat & 7); + switch (fetchdat & 0x38) + { + case 0x10: /*PSRLQ*/ + MMX_PSRLQ_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x20: /*PSRAQ*/ + MMX_PSRAQ_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + case 0x30: /*PSLLQ*/ + MMX_PSLLQ_imm(xmm_dst, (fetchdat >> 8) & 0xff); + break; + } + STORE_MMX_Q_MMX(fetchdat & 7, xmm_dst); + + return op_pc + 2; +} + +static uint32_t ropEMMS(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + codegen_mmx_entered = 0; + + return 0; +} diff --git a/src/codegen/codegen_ops_mov.h b/src/codegen/codegen_ops_mov.h new file mode 100644 index 000000000..fc4ec1637 --- /dev/null +++ b/src/codegen/codegen_ops_mov.h @@ -0,0 +1,656 @@ +static uint32_t ropMOV_rb_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_REG_B(opcode & 7, fetchdat & 0xff); + + return op_pc + 1; +} +static uint32_t ropMOV_rw_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_REG_W(opcode & 7, fetchdat & 0xffff); + + return op_pc + 2; +} +static uint32_t ropMOV_rl_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + fetchdat = fastreadl(cs + op_pc); + + STORE_IMM_REG_L(opcode & 7, fetchdat); + + return op_pc + 4; +} + + +static uint32_t ropMOV_b_r(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_B((fetchdat >> 3) & 7); + + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_REG_TARGET_B_RELEASE(host_reg, fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 0); + + MEM_STORE_ADDR_EA_B(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 1; +} +static uint32_t ropMOV_w_r(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg = LOAD_REG_W((fetchdat >> 3) & 7); + + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_REG_TARGET_W_RELEASE(host_reg, fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 1); + + MEM_STORE_ADDR_EA_W(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 1; +} + +static uint32_t ropMOV_l_r(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + host_reg = LOAD_REG_L((fetchdat >> 3) & 7); + + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_REG_TARGET_L_RELEASE(host_reg, fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 3); + + MEM_STORE_ADDR_EA_L(target_seg, host_reg); + RELEASE_REG(host_reg); + + } + + return op_pc + 1; +} + +static uint32_t ropMOV_r_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_B(fetchdat & 7); + STORE_REG_TARGET_B_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_B(target_seg); + STORE_REG_TARGET_B_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOV_r_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_W(fetchdat & 7); + STORE_REG_TARGET_W_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_W(target_seg); + STORE_REG_TARGET_W_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOV_r_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_L(fetchdat & 7); + STORE_REG_TARGET_L_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_L(target_seg); + STORE_REG_TARGET_L_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} + +static uint32_t ropMOV_b_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_IMM_REG_B(fetchdat & 7, (fetchdat >> 8) & 0xff); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + uint32_t imm = fastreadb(cs + op_pc + 1); + int host_reg = LOAD_REG_IMM(imm); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_B(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 2; +} +static uint32_t ropMOV_w_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + STORE_IMM_REG_W(fetchdat & 7, (fetchdat >> 8) & 0xffff); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + uint32_t imm = fastreadw(cs + op_pc + 1); + int host_reg = LOAD_REG_IMM(imm); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_W(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 3; +} +static uint32_t ropMOV_l_imm(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + uint32_t imm = fastreadl(cs + op_pc + 1); + + STORE_IMM_REG_L(fetchdat & 7, imm); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + uint32_t imm = fastreadl(cs + op_pc + 1); + int host_reg = LOAD_REG_IMM(imm); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + CHECK_SEG_WRITE(target_seg); + + MEM_STORE_ADDR_EA_L(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 5; +} + + +static uint32_t ropMOV_AL_a(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_READ(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + MEM_LOAD_ADDR_IMM_B(op_ea_seg, addr); + STORE_REG_TARGET_B_RELEASE(0, REG_AL); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} +static uint32_t ropMOV_AX_a(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_READ(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + MEM_LOAD_ADDR_IMM_W(op_ea_seg, addr); + STORE_REG_TARGET_W_RELEASE(0, REG_AX); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} +static uint32_t ropMOV_EAX_a(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_READ(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + MEM_LOAD_ADDR_IMM_L(op_ea_seg, addr); + STORE_REG_TARGET_L_RELEASE(0, REG_EAX); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} + +static uint32_t ropMOV_a_AL(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + int host_reg; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_WRITE(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + host_reg = LOAD_REG_B(REG_AL); + + MEM_STORE_ADDR_IMM_B(op_ea_seg, addr, host_reg); + RELEASE_REG(host_reg); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} +static uint32_t ropMOV_a_AX(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + int host_reg; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_WRITE(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + host_reg = LOAD_REG_W(REG_AX); + + MEM_STORE_ADDR_IMM_W(op_ea_seg, addr, host_reg); + RELEASE_REG(host_reg); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} +static uint32_t ropMOV_a_EAX(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t addr; + int host_reg; + + if (op_32 & 0x200) + addr = fastreadl(cs + op_pc); + else + addr = fastreadw(cs + op_pc); + + CHECK_SEG_WRITE(op_ea_seg); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + host_reg = LOAD_REG_L(REG_EAX); + + MEM_STORE_ADDR_IMM_L(op_ea_seg, addr, host_reg); + RELEASE_REG(host_reg); + + return op_pc + ((op_32 & 0x200) ? 4 : 2); +} + +static uint32_t ropLEA_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int dest_reg = (fetchdat >> 3) & 7; + + if ((fetchdat & 0xc0) == 0xc0) + return 0; + + FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_REG_TARGET_W_RELEASE(0, dest_reg); + + return op_pc + 1; +} +static uint32_t ropLEA_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int dest_reg = (fetchdat >> 3) & 7; + + if ((fetchdat & 0xc0) == 0xc0) + return 0; + + FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_REG_TARGET_L_RELEASE(0, dest_reg); + + return op_pc + 1; +} + +static uint32_t ropMOVZX_w_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_B(fetchdat & 7); + host_reg = ZERO_EXTEND_W_B(host_reg); + STORE_REG_TARGET_W_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_B(target_seg); + ZERO_EXTEND_W_B(0); + STORE_REG_TARGET_W_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOVZX_l_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_B(fetchdat & 7); + host_reg = ZERO_EXTEND_L_B(host_reg); + STORE_REG_TARGET_L_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_B(target_seg); + ZERO_EXTEND_L_B(0); + STORE_REG_TARGET_L_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOVZX_l_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_W(fetchdat & 7); + host_reg = ZERO_EXTEND_L_W(host_reg); + STORE_REG_TARGET_L_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_W(target_seg); + ZERO_EXTEND_L_W(0); + STORE_REG_TARGET_L_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} + +static uint32_t ropMOVSX_w_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_B(fetchdat & 7); + host_reg = SIGN_EXTEND_W_B(host_reg); + STORE_REG_TARGET_W_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_B(target_seg); + SIGN_EXTEND_W_B(0); + STORE_REG_TARGET_W_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOVSX_l_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_B(fetchdat & 7); + host_reg = SIGN_EXTEND_L_B(host_reg); + STORE_REG_TARGET_L_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_B(target_seg); + SIGN_EXTEND_L_B(0); + STORE_REG_TARGET_L_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} +static uint32_t ropMOVSX_l_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + if ((fetchdat & 0xc0) == 0xc0) + { + int host_reg = LOAD_REG_W(fetchdat & 7); + host_reg = SIGN_EXTEND_L_W(host_reg); + STORE_REG_TARGET_L_RELEASE(host_reg, (fetchdat >> 3) & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_READ(target_seg); + + MEM_LOAD_ADDR_EA_W(target_seg); + SIGN_EXTEND_L_W(0); + STORE_REG_TARGET_L_RELEASE(0, (fetchdat >> 3) & 7); + } + + return op_pc + 1; +} + +static uint32_t ropMOV_w_seg(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + switch (fetchdat & 0x38) + { + case 0x00: /*ES*/ + host_reg = LOAD_VAR_WL((uintptr_t)&ES); + break; + case 0x08: /*CS*/ + host_reg = LOAD_VAR_WL((uintptr_t)&CS); + break; + case 0x18: /*DS*/ + host_reg = LOAD_VAR_WL((uintptr_t)&DS); + break; + case 0x10: /*SS*/ + host_reg = LOAD_VAR_WL((uintptr_t)&SS); + break; + case 0x20: /*FS*/ + host_reg = LOAD_VAR_WL((uintptr_t)&FS); + break; + case 0x28: /*GS*/ + host_reg = LOAD_VAR_WL((uintptr_t)&GS); + break; + default: + return 0; + } + + if ((fetchdat & 0xc0) == 0xc0) + { + if (op_32 & 0x100) + STORE_REG_TARGET_L_RELEASE(host_reg, fetchdat & 7); + else + STORE_REG_TARGET_W_RELEASE(host_reg, fetchdat & 7); + } + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + CHECK_SEG_WRITE(target_seg); + CHECK_SEG_LIMITS(target_seg, 1); + + MEM_STORE_ADDR_EA_W(target_seg, host_reg); + RELEASE_REG(host_reg); + } + + return op_pc + 1; +} +static uint32_t ropMOV_seg_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + switch (fetchdat & 0x38) + { + case 0x00: /*ES*/ + case 0x18: /*DS*/ + case 0x20: /*FS*/ + case 0x28: /*GS*/ + break; + case 0x10: /*SS*/ + default: + return 0; + } + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + + if ((fetchdat & 0xc0) == 0xc0) + host_reg = LOAD_REG_W(fetchdat & 7); + else + { + x86seg *target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + + CHECK_SEG_READ(target_seg); + MEM_LOAD_ADDR_EA_W(target_seg); + + host_reg = 0; + } + + switch (fetchdat & 0x38) + { + case 0x00: /*ES*/ + LOAD_SEG(host_reg, &cpu_state.seg_es); + break; + case 0x18: /*DS*/ + LOAD_SEG(host_reg, &cpu_state.seg_ds); + break; + case 0x20: /*FS*/ + LOAD_SEG(host_reg, &cpu_state.seg_fs); + break; + case 0x28: /*GS*/ + LOAD_SEG(host_reg, &cpu_state.seg_gs); + break; + } + + return op_pc + 1; +} + +#define ropLseg(seg, rseg) \ +static uint32_t ropL ## seg(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + int dest_reg = (fetchdat >> 3) & 7; \ + x86seg *target_seg; \ + \ + if ((fetchdat & 0xc0) == 0xc0) \ + return 0; \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + SAVE_EA(); \ + \ + if (op_32 & 0x100) \ + { \ + MEM_LOAD_ADDR_EA_L(target_seg); \ + STORE_HOST_REG_ADDR((uintptr_t)&codegen_temp, 0); \ + LOAD_EA(); \ + MEM_LOAD_ADDR_EA_W_OFFSET(target_seg, 4); \ + } \ + else \ + { \ + MEM_LOAD_ADDR_EA_W(target_seg); \ + STORE_HOST_REG_ADDR_W((uintptr_t)&codegen_temp, 0); \ + LOAD_EA(); \ + MEM_LOAD_ADDR_EA_W_OFFSET(target_seg, 2); \ + } \ + LOAD_SEG(0, &rseg); \ + if (op_32 & 0x100) \ + { \ + \ + int host_reg = LOAD_VAR_L((uintptr_t)&codegen_temp); \ + STORE_REG_TARGET_L_RELEASE(host_reg, dest_reg); \ + } \ + else \ + { \ + int host_reg = LOAD_VAR_W((uintptr_t)&codegen_temp); \ + STORE_REG_TARGET_W_RELEASE(host_reg, dest_reg); \ + } \ + \ + if (&rseg == &cpu_state.seg_ss) \ + CPU_BLOCK_END(); /*Instruction might change stack size, so end block here*/ \ + return op_pc + 1; \ +} + +ropLseg(DS, cpu_state.seg_ds) +ropLseg(ES, cpu_state.seg_es) +ropLseg(FS, cpu_state.seg_fs) +ropLseg(GS, cpu_state.seg_gs) +ropLseg(SS, cpu_state.seg_ss) diff --git a/src/codegen/codegen_ops_shift.h b/src/codegen/codegen_ops_shift.h new file mode 100644 index 000000000..b67c34544 --- /dev/null +++ b/src/codegen/codegen_ops_shift.h @@ -0,0 +1,125 @@ +#define SHIFT(size, size2, res_store, immediate) \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + reg = LOAD_REG_ ## size(fetchdat & 7); \ + if (immediate) count = (fetchdat >> 8) & 0x1f; \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_ ## size(target_seg); \ + reg = MEM_LOAD_ADDR_EA_ ## size ## _NO_ABRT(target_seg); \ + if (immediate) count = fastreadb(cs + op_pc + 1) & 0x1f; \ + } \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op2, count); \ + \ + res_store((uintptr_t)&cpu_state.flags_op1, reg); \ + \ + switch (fetchdat & 0x38) \ + { \ + case 0x20: case 0x30: /*SHL*/ \ + SHL_ ## size ## _IMM(reg, count); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SHL ## size2); \ + break; \ + \ + case 0x28: /*SHR*/ \ + SHR_ ## size ## _IMM(reg, count); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SHR ## size2); \ + break; \ + \ + case 0x38: /*SAR*/ \ + SAR_ ## size ## _IMM(reg, count); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_SAR ## size2); \ + break; \ + } \ + \ + res_store((uintptr_t)&cpu_state.flags_res, reg); \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_ ## size ## _RELEASE(reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_ ## size ## _NO_ABRT(target_seg, reg); \ + } + +static uint32_t ropC0(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(B, 8, STORE_HOST_REG_ADDR_BL, 1); + + return op_pc + 2; +} +static uint32_t ropC1_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(W, 16, STORE_HOST_REG_ADDR_WL, 1); + + return op_pc + 2; +} +static uint32_t ropC1_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(L, 32, STORE_HOST_REG_ADDR, 1); + + return op_pc + 2; +} + +static uint32_t ropD0(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count = 1; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(B, 8, STORE_HOST_REG_ADDR_BL, 0); + + return op_pc + 1; +} +static uint32_t ropD1_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count = 1; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(W, 16, STORE_HOST_REG_ADDR_WL, 0); + + return op_pc + 1; +} +static uint32_t ropD1_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + x86seg *target_seg = NULL; + int count = 1; + int reg; + + if ((fetchdat & 0x38) < 0x20) + return 0; + + SHIFT(L, 32, STORE_HOST_REG_ADDR, 0); + + return op_pc + 1; +} diff --git a/src/codegen/codegen_ops_stack.h b/src/codegen/codegen_ops_stack.h new file mode 100644 index 000000000..288be2c6c --- /dev/null +++ b/src/codegen/codegen_ops_stack.h @@ -0,0 +1,269 @@ +static uint32_t ropPUSH_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + host_reg = LOAD_REG_W(opcode & 7); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + + return op_pc; +} +static uint32_t ropPUSH_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + host_reg = LOAD_REG_L(opcode & 7); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + + return op_pc; +} + +static uint32_t ropPUSH_imm_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t imm = fetchdat & 0xffff; + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + host_reg = LOAD_REG_IMM(imm); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + + return op_pc+2; +} +static uint32_t ropPUSH_imm_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t imm = fastreadl(cs + op_pc); + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + host_reg = LOAD_REG_IMM(imm); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + + return op_pc+4; +} + +static uint32_t ropPUSH_imm_b16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t imm = fetchdat & 0xff; + int host_reg; + + if (imm & 0x80) + imm |= 0xff00; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + host_reg = LOAD_REG_IMM(imm); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + + return op_pc+1; +} +static uint32_t ropPUSH_imm_b32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t imm = fetchdat & 0xff; + int host_reg; + + if (imm & 0x80) + imm |= 0xffffff00; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + host_reg = LOAD_REG_IMM(imm); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + + return op_pc+1; +} + +static uint32_t ropPOP_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); + SP_MODIFY(2); + STORE_REG_TARGET_W_RELEASE(0, opcode & 7); + + return op_pc; +} +static uint32_t ropPOP_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_L(&cpu_state.seg_ss); + SP_MODIFY(4); + STORE_REG_TARGET_L_RELEASE(0, opcode & 7); + + return op_pc; +} + +static uint32_t ropRET_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, 0); + SP_MODIFY(2); + + return -1; +} +static uint32_t ropRET_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_L(&cpu_state.seg_ss); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, 0); + SP_MODIFY(4); + + return -1; +} + +static uint32_t ropRET_imm_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t offset = fetchdat & 0xffff; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, 0); + SP_MODIFY(2+offset); + + return -1; +} +static uint32_t ropRET_imm_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t offset = fetchdat & 0xffff; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(0); + MEM_LOAD_ADDR_EA_L(&cpu_state.seg_ss); + STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.pc, 0); + SP_MODIFY(4+offset); + + return -1; +} + +static uint32_t ropCALL_r16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint16_t offset = fetchdat & 0xffff; + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-2); + host_reg = LOAD_REG_IMM(op_pc+2); + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-2); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, (op_pc+2+offset) & 0xffff); + + return -1; +} +static uint32_t ropCALL_r32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + uint32_t offset = fastreadl(cs + op_pc); + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_STACK_TO_EA(-4); + host_reg = LOAD_REG_IMM(op_pc+4); + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); + SP_MODIFY(-4); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, op_pc+4+offset); + + return -1; +} + +static uint32_t ropLEAVE_16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_EBP_TO_EA(0); + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); + host_reg = LOAD_REG_W(REG_BP); /*SP = BP + 2*/ + ADD_HOST_REG_IMM_W(host_reg, 2); + STORE_REG_TARGET_W_RELEASE(host_reg, REG_SP); + STORE_REG_TARGET_W_RELEASE(0, REG_BP); /*BP = POP_W()*/ + + return op_pc; +} +static uint32_t ropLEAVE_32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int host_reg; + + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + LOAD_EBP_TO_EA(0); + MEM_LOAD_ADDR_EA_L(&cpu_state.seg_ss); + host_reg = LOAD_REG_L(REG_EBP); /*ESP = EBP + 4*/ + ADD_HOST_REG_IMM(host_reg, 4); + STORE_REG_TARGET_L_RELEASE(host_reg, REG_ESP); + STORE_REG_TARGET_L_RELEASE(0, REG_EBP); /*EBP = POP_L()*/ + + return op_pc; +} + +#define ROP_PUSH_SEG(seg) \ +static uint32_t ropPUSH_ ## seg ## _16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + int host_reg; \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + LOAD_STACK_TO_EA(-2); \ + host_reg = LOAD_VAR_W((uintptr_t)&seg); \ + MEM_STORE_ADDR_EA_W(&cpu_state.seg_ss, host_reg); \ + SP_MODIFY(-2); \ + \ + return op_pc; \ +} \ +static uint32_t ropPUSH_ ## seg ## _32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + int host_reg; \ + \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + LOAD_STACK_TO_EA(-4); \ + host_reg = LOAD_VAR_W((uintptr_t)&seg); \ + MEM_STORE_ADDR_EA_L(&cpu_state.seg_ss, host_reg); \ + SP_MODIFY(-4); \ + \ + return op_pc; \ +} + +ROP_PUSH_SEG(CS) +ROP_PUSH_SEG(DS) +ROP_PUSH_SEG(ES) +ROP_PUSH_SEG(FS) +ROP_PUSH_SEG(GS) +ROP_PUSH_SEG(SS) + +#define ROP_POP_SEG(seg, rseg) \ +static uint32_t ropPOP_ ## seg ## _16(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + LOAD_STACK_TO_EA(0); \ + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); \ + LOAD_SEG(0, &rseg); \ + SP_MODIFY(2); \ + \ + return op_pc; \ +} \ +static uint32_t ropPOP_ ## seg ## _32(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ +{ \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + LOAD_STACK_TO_EA(0); \ + MEM_LOAD_ADDR_EA_W(&cpu_state.seg_ss); \ + LOAD_SEG(0, &rseg); \ + SP_MODIFY(4); \ + \ + return op_pc; \ +} + +ROP_POP_SEG(DS, cpu_state.seg_ds) +ROP_POP_SEG(ES, cpu_state.seg_es) +ROP_POP_SEG(FS, cpu_state.seg_fs) +ROP_POP_SEG(GS, cpu_state.seg_gs) diff --git a/src/codegen/codegen_ops_x86-64.h b/src/codegen/codegen_ops_x86-64.h new file mode 100644 index 000000000..178f0f868 --- /dev/null +++ b/src/codegen/codegen_ops_x86-64.h @@ -0,0 +1,6180 @@ +/*Register allocation : + R8-R15 - emulated registers +*/ +#include + +#define HOST_REG_XMM_START 0 +#define HOST_REG_XMM_END 7 + +#define IS_32_ADDR(x) !(((uintptr_t)x) & 0xffffffff00000000) + +static inline int find_host_xmm_reg() +{ + int c; + for (c = HOST_REG_XMM_START; c < HOST_REG_XMM_END; c++) + { + if (host_reg_xmm_mapping[c] == -1) + break; + } + + if (c == HOST_REG_XMM_END) + fatal("Out of host XMM regs!\n"); + return c; +} +static inline void call(codeblock_t *block, uintptr_t func) +{ + uintptr_t diff = func - (uintptr_t)&block->data[block_pos + 5]; + + codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] = 0; + codegen_reg_loaded[4] = codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0; + + if (diff >= -0x80000000 && diff < 0x7fffffff) + { + addbyte(0xE8); /*CALL*/ + addlong((uint32_t)diff); + } + else + { + addbyte(0x48); /*MOV RAX, func*/ + addbyte(0xb8); + addquad(func); + addbyte(0xff); /*CALL RAX*/ + addbyte(0xd0); + } +} + +static inline void call_long(uintptr_t func) +{ + codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] = 0; + codegen_reg_loaded[4] = codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0; + + addbyte(0x48); /*MOV RAX, func*/ + addbyte(0xb8); + addquad(func); + addbyte(0xff); /*CALL RAX*/ + addbyte(0xd0); +} + +static inline void load_param_1_32(codeblock_t *block, uint32_t param) +{ +#if WIN64 + addbyte(0xb9); /*MOVL $fetchdat,%ecx*/ +#else + addbyte(0xbf); /*MOVL $fetchdat,%edi*/ +#endif + addlong(param); +} +static inline void load_param_1_reg_32(int reg) +{ +#if WIN64 + if (reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc0 | REG_ECX | (reg << 3)); +#else + if (reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOV EDI, EAX*/ + addbyte(0xc0 | REG_EDI | (reg << 3)); +#endif +} +#if 0 +static inline void load_param_1_64(codeblock_t *block, uint64_t param) +{ + addbyte(0x48); +#if WIN64 + addbyte(0xb9); /*MOVL $fetchdat,%ecx*/ +#else + addbyte(0xbf); /*MOVL $fetchdat,%edi*/ +#endif + addquad(param); +} +#endif + +static inline void load_param_2_32(codeblock_t *block, uint32_t param) +{ +#if WIN64 + addbyte(0xba); /*MOVL $fetchdat,%edx*/ +#else + addbyte(0xbe); /*MOVL $fetchdat,%esi*/ +#endif + addlong(param); +} +static inline void load_param_2_reg_32(int reg) +{ +#if WIN64 + if (reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOV EDX, EAX*/ + addbyte(0xc0 | REG_EDX | (reg << 3)); +#else + if (reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOV ESI, EAX*/ + addbyte(0xc0 | REG_ESI | (reg << 3)); +#endif +} +static inline void load_param_2_64(codeblock_t *block, uint64_t param) +{ + addbyte(0x48); +#if WIN64 + addbyte(0xba); /*MOVL $fetchdat,%edx*/ +#else + addbyte(0xbe); /*MOVL $fetchdat,%esi*/ +#endif + addquad(param); +} + +static inline void load_param_3_reg_32(int reg) +{ + if (reg & 8) + { +#if WIN64 + addbyte(0x45); /*MOVL R8,reg*/ + addbyte(0x89); + addbyte(0xc0 | ((reg & 7) << 3)); +#else + addbyte(0x44); /*MOV EDX, reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EDX | (reg << 3)); +#endif + } + else + { +#if WIN64 + addbyte(0x41); /*MOVL R8,reg*/ + addbyte(0x89); + addbyte(0xc0 | ((reg & 7) << 3)); +#else + addbyte(0x90); + addbyte(0x89); /*MOV EDX, reg*/ + addbyte(0xc0 | REG_EDX | (reg << 3)); +#endif + } +} +static inline void load_param_3_reg_64(int reg) +{ + if (reg & 8) + { +#if WIN64 + addbyte(0x4d); /*MOVL R8,reg*/ + addbyte(0x89); + addbyte(0xc0 | ((reg & 7) << 3)); +#else + addbyte(0x4c); /*MOVL EDX,reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EDX | ((reg & 7) << 3)); +#endif + } + else + { +#if WIN64 + addbyte(0x49); /*MOVL R8,reg*/ + addbyte(0x89); + addbyte(0xc0 | ((reg & 7) << 3)); +#else + addbyte(0x48); /*MOVL EDX,reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EDX | ((reg & 7) << 3)); +#endif + } +} + +static inline void CALL_FUNC(uintptr_t func) +{ + codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] = 0; + codegen_reg_loaded[4] = codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0; + + addbyte(0x48); /*MOV RAX, func*/ + addbyte(0xb8); + addquad(func); + addbyte(0xff); /*CALL RAX*/ + addbyte(0xd0); +} + +static inline void RELEASE_REG(int host_reg) +{ +} + +static inline int LOAD_REG_B(int reg) +{ + int host_reg = reg & 3; + + if (!codegen_reg_loaded[reg & 3]) + { + addbyte(0x44); /*MOVZX W[reg],host_reg*/ + addbyte(0x8b); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[host_reg & 3].b)); + } + + codegen_reg_loaded[reg & 3] = 1; + + if (reg & 4) + return host_reg | 0x18; + + return host_reg | 8; +} +static inline int LOAD_REG_W(int reg) +{ + int host_reg = reg; + + if (!codegen_reg_loaded[reg & 7]) + { + addbyte(0x44); /*MOVZX W[reg],host_reg*/ + addbyte(0x8b); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[reg & 7].w)); + } + + codegen_reg_loaded[reg & 7] = 1; + + return host_reg | 8; +} +static inline int LOAD_REG_L(int reg) +{ + int host_reg = reg; + + if (!codegen_reg_loaded[reg & 7]) + { + addbyte(0x44); /*MOVZX W[reg],host_reg*/ + addbyte(0x8b); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[reg & 7].l)); + } + + codegen_reg_loaded[reg & 7] = 1; + + return host_reg | 8; +} + +static inline int LOAD_REG_IMM(uint32_t imm) +{ + int host_reg = REG_EBX; + + addbyte(0xb8 | REG_EBX); /*MOVL EBX, imm*/ + addlong(imm); + + return host_reg; +} + +static inline void STORE_REG_TARGET_B_RELEASE(int host_reg, int guest_reg) +{ + int dest_reg = LOAD_REG_L(guest_reg & 3) & 7; + + if (guest_reg & 4) + { + if (host_reg & 8) + { + addbyte(0x66); /*MOV AX, host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 3) << 3)); + if (host_reg & 0x10) + { + addbyte(0x66); /*AND AX, 0xff00*/ + addbyte(0x25); + addword(0xff00); + } + else + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(0x08); + } + } + else + { + if (host_reg) + { + addbyte(0x66); /*MOV AX, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 3) << 3)); + } + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(0x08); + } + addbyte(0x66); /*AND dest_reg, 0x00ff*/ + addbyte(0x41); + addbyte(0x81); + addbyte(0xe0 | dest_reg); + addword(0x00ff); + addbyte(0x66); /*OR dest_reg, AX*/ + addbyte(0x41); + addbyte(0x09); + addbyte(0xc0 | dest_reg); + addbyte(0x66); /*MOVW regs[guest_reg].w, dest_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | (dest_reg << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 3].w)); + } + else + { + if (host_reg & 8) + { + if (host_reg & 0x10) + { + addbyte(0x66); /*MOV AX, host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 3) << 3)); + addbyte(0x88); /*MOV AL, AH*/ + addbyte(0xe0); + addbyte(0x41); /*MOV dest_reg, AL*/ + addbyte(0x88); + addbyte(0xc0 | (dest_reg & 7)); + addbyte(0x88); /*MOVB regs[reg].b, AH*/ + addbyte(0x65); + addbyte(cpu_state_offset(regs[guest_reg & 3].b)); + } + else + { + addbyte(0x45); /*MOVB dest_reg, host_reg*/ + addbyte(0x88); + addbyte(0xc0 | (dest_reg & 7) | ((host_reg & 7) << 3)); + addbyte(0x44); /*MOVB regs[guest_reg].b, host_reg*/ + addbyte(0x88); + addbyte(0x45 | ((host_reg & 3) << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 3].b)); + } + } + else + { + if (host_reg & 0x10) + { + addbyte(0xc1); /*SHR host_reg, 8*/ + addbyte(0xe8 | (host_reg & 7)); + addbyte(8); + } + addbyte(0x41); /*MOVB dest_reg, host_reg*/ + addbyte(0x88); + addbyte(0xc0 | (dest_reg & 7) | ((host_reg & 7) << 3)); + addbyte(0x88); /*MOVB regs[guest_reg].b, host_reg*/ + addbyte(0x45 | ((host_reg & 3) << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 3].b)); + } + } +} +static inline void STORE_REG_TARGET_W_RELEASE(int host_reg, int guest_reg) +{ + int dest_reg = LOAD_REG_L(guest_reg & 7) & 7; + + if (host_reg & 8) + { + addbyte(0x66); /*MOVW guest_reg, host_reg*/ + addbyte(0x45); + addbyte(0x89); + addbyte(0xc0 | dest_reg | ((host_reg & 7) << 3)); + addbyte(0x66); /*MOVW regs[guest_reg].w, host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 7].w)); + } + else + { + addbyte(0x66); /*MOVW guest_reg, host_reg*/ + addbyte(0x41); + addbyte(0x89); + addbyte(0xc0 | dest_reg | (host_reg << 3)); + addbyte(0x66); /*MOVW regs[guest_reg].w, host_reg*/ + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 7].w)); + } +} +static inline void STORE_REG_TARGET_L_RELEASE(int host_reg, int guest_reg) +{ + if (host_reg & 8) + { + addbyte(0x45); /*MOVL guest_reg, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | guest_reg | (host_reg << 3)); + addbyte(0x44); /*MOVL regs[guest_reg].l, host_reg*/ + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 7].l)); + } + else + { + addbyte(0x41); /*MOVL guest_reg, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | guest_reg | (host_reg << 3)); + addbyte(0x89); /*MOVL regs[guest_reg].l, host_reg*/ + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[guest_reg & 7].l)); + } +} + +static inline void STORE_REG_B_RELEASE(int host_reg) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*MOVW [reg],host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte(cpu_state_offset(regs[host_reg & 7].w)); + } + else + { + addbyte(0x44); /*MOVB [reg],host_reg*/ + addbyte(0x88); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte(cpu_state_offset(regs[host_reg & 7].b)); + } +} +static inline void STORE_REG_W_RELEASE(int host_reg) +{ + addbyte(0x66); /*MOVW [reg],host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte(cpu_state_offset(regs[host_reg & 7].w)); +} +static inline void STORE_REG_L_RELEASE(int host_reg) +{ + addbyte(0x44); /*MOVL [reg],host_reg*/ + addbyte(0x89); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte(cpu_state_offset(regs[host_reg & 7].l)); +} + +static inline void STORE_IMM_REG_B(int reg, uint8_t val) +{ + if (reg & 4) + { + int host_reg = LOAD_REG_W(reg & 3) & 7; + addbyte(0x66); /*AND host_reg, 0x00ff*/ + addbyte(0x41); + addbyte(0x81); + addbyte(0xe0 | host_reg); + addword(0x00ff); + addbyte(0x66); /*OR host_reg, val << 8*/ + addbyte(0x41); + addbyte(0x81); + addbyte(0xc8 | host_reg); + addword(val << 8); + addbyte(0x66); /*MOVW host_reg, regs[host_reg].w*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte(cpu_state_offset(regs[reg & 3].w)); + } + else + { + addbyte(0x41); /*MOVB reg, imm*/ + addbyte(0xb0 | reg); + addbyte(val); + addbyte(0x44); /*MOVB reg, regs[reg].b*/ + addbyte(0x88); + addbyte(0x45 | (reg << 3)); + addbyte(cpu_state_offset(regs[reg & 7].b)); + } +} +static inline void STORE_IMM_REG_W(int reg, uint16_t val) +{ + addbyte(0x66); /*MOVW reg, imm*/ + addbyte(0x41); + addbyte(0xb8 | reg); + addword(val); + addbyte(0x66); /*MOVW reg, regs[reg].w*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | (reg << 3)); + addbyte(cpu_state_offset(regs[reg & 7].w)); +} +static inline void STORE_IMM_REG_L(int reg, uint32_t val) +{ + addbyte(0x41); /*MOVL reg, imm*/ + addbyte(0xb8 | reg); + addlong(val); + addbyte(0x44); /*MOVL reg, regs[reg].l*/ + addbyte(0x89); + addbyte(0x45 | (reg << 3)); + addbyte(cpu_state_offset(regs[reg & 7].l)); +} + +static inline void STORE_IMM_ADDR_L(uintptr_t addr, uint32_t val) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0xC7); /*MOVL [addr],val*/ + addbyte(0x45); + addbyte(addr - (uintptr_t)&cpu_state - 128); + addlong(val); + } + else if (addr < 0x100000000) + { + addbyte(0xC7); /*MOVL [addr],val*/ + addbyte(0x04); + addbyte(0x25); + addlong(addr); + addlong(val); + } + else + { + addbyte(0x48); /*MOV ESI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad(addr); + addbyte(0xc7); /*MOVL [ESI], val*/ + addbyte(0x00 | REG_ESI); + addlong(val); + } +} + + + + +static x86seg *FETCH_EA_16(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc) +{ + int mod = (fetchdat >> 6) & 3; + int rm = fetchdat & 7; + + if (!mod && rm == 6) + { + addbyte(0xb8); /*MOVL EAX, imm*/ + addlong((fetchdat >> 8) & 0xffff); + (*op_pc) += 2; + } + else + { + int base_reg = 0, index_reg = 0; + + switch (rm) + { + case 0: case 1: case 7: + base_reg = LOAD_REG_W(REG_BX); + break; + case 2: case 3: case 6: + base_reg = LOAD_REG_W(REG_BP); + break; + case 4: + base_reg = LOAD_REG_W(REG_SI); + break; + case 5: + base_reg = LOAD_REG_W(REG_DI); + break; + } + if (!(rm & 4)) + { + if (rm & 1) + index_reg = LOAD_REG_W(REG_DI); + else + index_reg = LOAD_REG_W(REG_SI); + } + base_reg &= 7; + index_reg &= 7; + + switch (mod) + { + case 0: + if (rm & 4) + { + addbyte(0x41); /*MOVZX EAX, base_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xc0 | base_reg); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg*/ + addbyte(0x43); + addbyte(0x8d); + if (base_reg == 5) + { + addbyte(0x44); + addbyte(base_reg | (index_reg << 3)); + addbyte(0); + } + else + { + addbyte(0x04); + addbyte(base_reg | (index_reg << 3)); + } + } + break; + case 1: + if (rm & 4) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x40 | base_reg); + addbyte((fetchdat >> 8) & 0xff); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm8*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x44); + addbyte(base_reg | (index_reg << 3)); + addbyte((fetchdat >> 8) & 0xff); + } + (*op_pc)++; + break; + case 2: + if (rm & 4) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | base_reg); + addlong((fetchdat >> 8) & 0xffff); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm16*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x84); + addbyte(base_reg | (index_reg << 3)); + addlong((fetchdat >> 8) & 0xffff); + } + (*op_pc) += 2; + break; + + } + if (mod || !(rm & 4)) + { + addbyte(0x25); /*ANDL $0xffff, %eax*/ + addlong(0xffff); + } + + if (mod1seg[rm] == &ss && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + } + return op_ea_seg; +} +static x86seg *FETCH_EA_32(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, int stack_offset) +{ + int mod = (fetchdat >> 6) & 3; + int rm = fetchdat & 7; + uint32_t new_eaaddr; + + if (rm == 4) + { + uint8_t sib = fetchdat >> 8; + int base_reg = -1, index_reg = -1; + + (*op_pc)++; + + if (mod || (sib & 7) != 5) + base_reg = LOAD_REG_L(sib & 7) & 7; + + if (((sib >> 3) & 7) != 4) + index_reg = LOAD_REG_L((sib >> 3) & 7) & 7; + + if (index_reg == -1) + { + switch (mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOV EAX, imm32*/ + addlong(new_eaaddr); + (*op_pc) += 4; + } + else + { + addbyte(0x44); /*MOV EAX, base_reg*/ + addbyte(0x89); + addbyte(0xc0 | (base_reg << 3)); + } + break; + case 1: + addbyte(0x67); /*LEA EAX, imm8+base_reg*/ + addbyte(0x41); + addbyte(0x8d); + if (base_reg == 4) + { + addbyte(0x44); + addbyte(0x24); + } + else + { + addbyte(0x40 | base_reg); + } + addbyte((fetchdat >> 16) & 0xff); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, imm32+base_reg*/ + addbyte(0x41); + addbyte(0x8d); + if (base_reg == 4) + { + addbyte(0x84); + addbyte(0x24); + } + else + { + addbyte(0x80 | base_reg); + } + addlong(new_eaaddr); + (*op_pc) += 4; + break; + } + } + else + { + switch (mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + if (sib >> 6) + { + addbyte(0x67); /*LEA EAX, imm32+index_reg*scale*/ + addbyte(0x42); + addbyte(0x8d); + addbyte(0x04); + addbyte(0x05 | (sib & 0xc0) | (index_reg << 3)); + addlong(new_eaaddr); + } + else + { + addbyte(0x67); /*LEA EAX, imm32+index_reg*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | index_reg); + addlong(new_eaaddr); + } + (*op_pc) += 4; + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + if (base_reg == 5) + { + addbyte(0x44); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addbyte(0); + } + else + { + addbyte(0x04); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + } + } + break; + case 1: + addbyte(0x67); /*LEA EAX, imm8+base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x44); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addbyte((fetchdat >> 16) & 0xff); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, imm32+base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x84); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addlong(new_eaaddr); + (*op_pc) += 4; + break; + } + } + if (stack_offset && (sib & 7) == 4 && (mod || (sib & 7) != 5)) /*ESP*/ + { + addbyte(0x05); + addlong(stack_offset); + } + if (((sib & 7) == 4 || (mod && (sib & 7) == 5)) && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + } + else + { + int base_reg; + + if (!mod && rm == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL EAX, new_eaaddr*/ + addlong(new_eaaddr); + (*op_pc) += 4; + return op_ea_seg; + } + base_reg = LOAD_REG_L(rm) & 7; + if (mod) + { + if (rm == 5 && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (mod == 1) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x40 | base_reg); + addbyte((fetchdat >> 8) & 0xff); + (*op_pc)++; + } + else + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, base_reg+imm32*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | base_reg); + addlong(new_eaaddr); + (*op_pc) += 4; + } + } + else + { + addbyte(0x44); /*MOV EAX, base_reg*/ + addbyte(0x89); + addbyte(0xc0 | (base_reg << 3)); + } + } + return op_ea_seg; +} + +static inline x86seg *FETCH_EA(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, uint32_t op_32) +{ + if (op_32 & 0x200) + return FETCH_EA_32(op_ea_seg, fetchdat, op_ssegs, op_pc, 0); + return FETCH_EA_16(op_ea_seg, fetchdat, op_ssegs, op_pc); +} + + + +static inline void CHECK_SEG_READ(x86seg *seg) +{ + /*Segments always valid in real/V86 mode*/ + if (!(cr0 & 1) || (cpu_state.eflags & VM_FLAG)) + return; + /*CS and SS must always be valid*/ + if (seg == &cpu_state.seg_cs || seg == &cpu_state.seg_ss) + return; + if (seg->checked) + return; + if (seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) + return; + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x83); /*CMP seg->base, -1*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + addbyte(-1); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x83); /*CMP RSI, -1*/ + addbyte(0xe8 | REG_ESI); + addbyte(0xff); + } + addbyte(0x0f); /*JE GPF_BLOCK_OFFSET*/ + addbyte(0x84); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + + seg->checked = 1; +} +static inline void CHECK_SEG_WRITE(x86seg *seg) +{ + /*Segments always valid in real/V86 mode*/ + if (!(cr0 & 1) || (cpu_state.eflags & VM_FLAG)) + return; + /*CS and SS must always be valid*/ + if (seg == &cpu_state.seg_cs || seg == &cpu_state.seg_ss) + return; + if (seg->checked) + return; + if (seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) + return; + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x83); /*CMP seg->base, -1*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + addbyte(-1); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x83); /*CMP RSI, -1*/ + addbyte(0xe8 | REG_ESI); + addbyte(0xff); + } + addbyte(0x0f); /*JE GPF_BLOCK_OFFSET*/ + addbyte(0x84); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + + seg->checked = 1; +} +static inline void CHECK_SEG_LIMITS(x86seg *seg, int end_offset) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + return; + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0xb8 | REG_ESI); /*MOV ESI, &addr*/ + addlong((uint32_t)(uintptr_t)seg); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)seg); + } + addbyte(0x3b); /*CMP EAX, seg->limit_low*/ + addbyte(0x46); + addbyte((uintptr_t)&seg->limit_low - (uintptr_t)seg); + addbyte(0x0f); /*JB BLOCK_GPF_OFFSET*/ + addbyte(0x82); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + if (end_offset) + { + addbyte(0x83); /*ADD EAX, end_offset*/ + addbyte(0xc0); + addbyte(end_offset); + addbyte(0x3b); /*CMP EAX, seg->limit_high*/ + addbyte(0x46); + addbyte((uintptr_t)&seg->limit_high - (uintptr_t)seg); + addbyte(0x0f); /*JNBE BLOCK_GPF_OFFSET*/ + addbyte(0x87); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + addbyte(0x83); /*SUB EAX, end_offset*/ + addbyte(0xe8); + addbyte(end_offset); + } +} + +static inline void MEM_LOAD_ADDR_EA_B(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2); + addbyte(0x8b); /*MOV AL,[RDI+RSI]*/ + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemb386l); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_LOAD_ADDR_EA_W(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+2); + addbyte(0x66); /*MOV AX,[RDI+RSI]*/ + addbyte(0x8b); + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemwl); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_LOAD_ADDR_EA_W_OFFSET(x86seg *seg, int offset) +{ + addbyte(0x83); /*ADD EAX, offset*/ + addbyte(0xc0); + addbyte(offset); + MEM_LOAD_ADDR_EA_W(seg); +} +static inline void MEM_LOAD_ADDR_EA_L(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2); + addbyte(0x8b); /*MOV EAX,[RDI+RSI]*/ + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemll); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_LOAD_ADDR_EA_Q(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 7*/ + addbyte(0xc7); + addlong(7); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+2); + addbyte(0x48); /*MOV RAX,[RDI+RSI]*/ + addbyte(0x8b); + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemql); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} + +static inline void MEM_LOAD_ADDR_IMM_B(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_B(seg); +} +static inline void MEM_LOAD_ADDR_IMM_W(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_W(seg); +} +static inline void MEM_LOAD_ADDR_IMM_L(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_L(seg); +} + +static inline void MEM_STORE_ADDR_EA_B(x86seg *seg, int host_reg) +{ + if (host_reg & 0x10) + { + /*Handle high byte of register*/ + if (host_reg & 8) + { + addbyte(0x45); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + addbyte(0x66); /*SHR R8, 8*/ + addbyte(0x41); + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + host_reg = 8; + } + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 4:3)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x88); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x88); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + load_param_3_reg_32(host_reg); + call_long((uintptr_t)writememb386l); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_STORE_ADDR_EA_W(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 5:4)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 5:4)+2); + if (host_reg & 8) + { + addbyte(0x66); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x66); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + load_param_3_reg_32(host_reg); + call_long((uintptr_t)writememwl); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_STORE_ADDR_EA_L(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 4:3)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 4:3)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV -3[RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x89); /*MOV -3[RDI+RSI],host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + load_param_3_reg_32(host_reg); + call_long((uintptr_t)writememll); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} +static inline void MEM_STORE_ADDR_EA_Q(x86seg *seg, int host_reg, int host_reg2) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 7*/ + addbyte(0xc7); + addlong(7); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+2); + if (host_reg & 8) + { + addbyte(0x4c); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x48); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12+4+6); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + load_param_3_reg_64(host_reg); + call_long((uintptr_t)writememql); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + /*done:*/ +} + +static inline void MEM_STORE_ADDR_IMM_B(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_B(seg, host_reg); +} +static inline void MEM_STORE_ADDR_IMM_W(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_W(seg, host_reg); +} +static inline void MEM_STORE_ADDR_IMM_L(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_L(seg, host_reg); +} + +static inline void STORE_HOST_REG_ADDR_BL(uintptr_t addr, int host_reg) +{ + int temp_reg = REG_ECX; + + if (host_reg_mapping[REG_ECX] != -1) + temp_reg = REG_EBX; + + if (host_reg & 0x10) + { + if (host_reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ + addbyte(0xb7); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); + addbyte(0xc1); /*SHR temp_reg, 8*/ + addbyte(0xe8 | temp_reg); + addbyte(8); + } + else + { + if (host_reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ + addbyte(0xb6); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); + } + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x45 | (temp_reg << 3)); + addbyte((uint32_t)addr - (uint32_t)(uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x04 | (temp_reg << 3)); + addbyte(0x25); + addlong(addr); + } + else + { + addbyte(0x48); /*MOV RSI, addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)addr); + addbyte(0x89); /*MOV [RSI], temp_reg*/ + addbyte(0x06 | (temp_reg << 3)); + } +} +static inline void STORE_HOST_REG_ADDR_WL(uintptr_t addr, int host_reg) +{ + int temp_reg = REG_ECX; + + if (host_reg_mapping[REG_ECX] != -1) + temp_reg = REG_EBX; + + if (host_reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ + addbyte(0xb7); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x45 | (temp_reg << 3)); + addbyte((uint32_t)addr - (uint32_t)(uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x04 | (temp_reg << 3)); + addbyte(0x25); + addlong(addr); + } + else + { + addbyte(0x48); /*MOV RSI, addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)addr); + addbyte(0x89); /*MOV [RSI], temp_reg*/ + addbyte(0x06 | (temp_reg << 3)); + } +} +static inline void STORE_HOST_REG_ADDR_W(uintptr_t addr, int host_reg) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x66); /*MOVW [addr],host_reg*/ + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte((uint32_t)addr - (uint32_t)(uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + addbyte(0x66); + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOVW addr,host_reg*/ + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(0x25); + addlong(addr); + } + else + { + addbyte(0x48); /*MOV RSI, addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)addr); + + addbyte(0x66); + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOVW [RSI],host_reg*/ + addbyte(0x06 | ((host_reg & 7) << 3)); + } +} +static inline void STORE_HOST_REG_ADDR(uintptr_t addr, int host_reg) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOVL [addr],host_reg*/ + addbyte(0x45 | ((host_reg & 7) << 3)); + addbyte((uint32_t)addr - (uint32_t)(uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOVL addr,host_reg*/ + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(0x25); + addlong(addr); + } + else + { + addbyte(0x48); /*MOV RSI, addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)addr); + + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOVL [RSI],host_reg*/ + addbyte(0x06 | ((host_reg & 7) << 3)); + } +} + +static inline void AND_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + if (!(src_reg & 0x10)) + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + } + addbyte(0x66); /*OR AX, 0x00ff*/ + addbyte(0x0d); + addword(0xff); + addbyte(0x66); /*ANDW dst_reg, AX*/ + addbyte(0x41); + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7)); + } + else if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x41); /*ANDB dst_reg, AL*/ + addbyte(0x20); + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x45); /*ANDB dst_reg, src_reg*/ + addbyte(0x20); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (dst_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*SHL src_reg, 8*/ + addbyte(0xc1); + addbyte(0xe0 | src_reg); + addbyte(0x08); + addbyte(0x66); /*OR src_reg, 0xff*/ + addbyte(0x81); + addbyte(0xc8 | src_reg); + addword(0xff); + addbyte(0x66); /*ANDW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*ANDB dst_reg, src_reg*/ + addbyte(0x20); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x20); /*ANDB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*ANDB dst_reg, src_reg*/ + addbyte(0x20); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x20); /*ANDB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x20); /*ANDB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } +} +static inline void AND_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x66); /*ANDW dst_reg, src_reg*/ + addbyte(0x45); + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x66); /*ANDW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x66); /*ANDW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x66); /*ANDW dst_reg, src_reg*/ + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void AND_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x45); /*ANDL dst_reg, src_reg*/ + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x41); /*ANDL dst_reg, src_reg*/ + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x44); /*ANDL dst_reg, src_reg*/ + addbyte(0x21); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x21); /*ANDL dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void AND_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*ANDW host_reg, imm<<8*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); + addbyte(0xe0 | (host_reg & 7)); + addword((imm << 8) | 0xff); + } + else + { + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); /*ANDL host_reg, imm*/ + addbyte(0xe0 | (host_reg & 7)); + addlong(imm); + } +} + +static inline int TEST_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = (dst_reg & 0x10) | REG_EDX; + } + + AND_HOST_REG_B(dst_reg, src_reg); + + return dst_reg & ~0x10; +} +static inline int TEST_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = REG_EDX; + } + + AND_HOST_REG_W(dst_reg, src_reg); + + return dst_reg; +} +static inline int TEST_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = REG_EDX; + } + + AND_HOST_REG_L(dst_reg, src_reg); + + return dst_reg; +} +static inline int TEST_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 8) + { + addbyte(0x44); /*MOV EDX, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EDX | ((host_reg & 7) << 3)); + host_reg = REG_EDX | (host_reg & 0x10); + } + if (host_reg & 0x10) + { + addbyte(0x66); /*ANDW host_reg, imm<<8*/ + addbyte(0x81); + addbyte(0xe0 | (host_reg & 7)); + addword((imm << 8) | 0xff); + } + else + { + addbyte(0x81); /*ANDL host_reg, imm*/ + addbyte(0xe0 | (host_reg & 7)); + addlong(imm); + } + + return host_reg; +} + +static inline void OR_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + if (!(src_reg & 0x10)) + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + } + else + { + addbyte(0x66); /*AND AX, 0xff00*/ + addbyte(0x25); + addword(0xff00); + } + addbyte(0x66); /*ORW dst_reg, AX*/ + addbyte(0x41); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7)); + } + else if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x41); /*ORB dst_reg, AL*/ + addbyte(0x08); + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x45); /*ORB dst_reg, src_reg*/ + addbyte(0x08); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (dst_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*SHL src_reg, 8*/ + addbyte(0xc1); + addbyte(0xe0 | src_reg); + addbyte(0x08); + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*ORB dst_reg, src_reg*/ + addbyte(0x08); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x08); /*ORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*ORB dst_reg, src_reg*/ + addbyte(0x08); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x08); /*ORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x08); /*ORB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } +} +static inline void OR_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x45); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void OR_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x45); /*ORL dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x41); /*ORL dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x44); /*ORL dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x09); /*ORW dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void OR_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*ORW host_reg, imm<<8*/ + addbyte(0x41); + addbyte(0x81); + addbyte(0xc8 | (host_reg & 7)); + addword(imm << 8); + } + else if (host_reg & 8) + { + addbyte(0x41); /*ORL host_reg, imm*/ + addbyte(0x81); + addbyte(0xc8 | (host_reg & 7)); + addlong(imm); + } + else + { + addbyte(0x81); /*ORL host_reg, imm*/ + addbyte(0xc8 | (host_reg & 7)); + addlong(imm); + } +} + +static inline void XOR_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + if (!(src_reg & 0x10)) + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + } + else + { + addbyte(0x66); /*AND AX, 0xff00*/ + addbyte(0x25); + addword(0xff00); + } + addbyte(0x66); /*XORW dst_reg, AX*/ + addbyte(0x41); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7)); + } + else if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x41); /*XORB dst_reg, AL*/ + addbyte(0x30); + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x45); /*XORB dst_reg, src_reg*/ + addbyte(0x30); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (dst_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*SHL src_reg, 8*/ + addbyte(0xc1); + addbyte(0xe0 | src_reg); + addbyte(0x08); + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*XORB dst_reg, src_reg*/ + addbyte(0x30); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x30); /*XORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*XORB dst_reg, src_reg*/ + addbyte(0x30); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x30); /*XORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x30); /*XORB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } +} +static inline void XOR_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x45); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void XOR_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x45); /*XORL dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x41); /*XORL dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x44); /*XORW dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x31); /*XORW dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void XOR_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*ORW host_reg, imm<<8*/ + addbyte(0x41); + addbyte(0x81); + addbyte(0xf0 | (host_reg & 7)); + addword(imm << 8); + } + else if (host_reg & 8) + { + addbyte(0x41); /*ORL host_reg, imm*/ + addbyte(0x81); + addbyte(0xf0 | (host_reg & 7)); + addlong(imm); + } + else + { + addbyte(0x81); /*ORL host_reg, imm*/ + addbyte(0xf0 | (host_reg & 7)); + addlong(imm); + } +} + +static inline void ADD_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + if (!(src_reg & 0x10)) + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + } + else + { + addbyte(0x66); /*AND AX, 0xff00*/ + addbyte(0x25); + addword(0xff00); + } + addbyte(0x66); /*ADDW dst_reg, AX*/ + addbyte(0x41); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7)); + } + else if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x41); /*ADDB dst_reg, AL*/ + addbyte(0x00); + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x45); /*ADDB dst_reg, src_reg*/ + addbyte(0x00); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (dst_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*SHL src_reg, 8*/ + addbyte(0xc1); + addbyte(0xe0 | src_reg); + addbyte(0x08); + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*ADDB dst_reg, src_reg*/ + addbyte(0x00); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (src_reg & 8) + { + if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x00); /*ADDB dst_reg, AL*/ + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*ADDB dst_reg, src_reg*/ + addbyte(0x00); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else + fatal("!(dst_reg & src_reg & 8)\n"); +} +static inline void ADD_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x45); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + fatal("!(dst_reg & src_reg & 8)\n"); +} +static inline void ADD_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x45); /*ADDL dst_reg, src_reg*/ + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x41); /*ADDL dst_reg, src_reg*/ + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x44); /*ADDL dst_reg, src_reg*/ + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + fatal("!(dst_reg & src_reg & 8)\n"); +} + +static inline void SUB_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + if (!(src_reg & 0x10)) + { + addbyte(0x66); /*SHL AX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + } + else + { + addbyte(0x66); /*AND AX, 0xff00*/ + addbyte(0x25); + addword(0xff00); + } + addbyte(0x66); /*SUBW dst_reg, AX*/ + addbyte(0x41); + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7)); + } + else if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x41); /*SUBB dst_reg, AL*/ + addbyte(0x28); + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x45); /*SUBB dst_reg, src_reg*/ + addbyte(0x28); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (dst_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0x66); /*SHL src_reg, 8*/ + addbyte(0xc1); + addbyte(0xe0 | src_reg); + addbyte(0x08); + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*SUBB dst_reg, src_reg*/ + addbyte(0x28); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x28); /*SUBB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*SUBB dst_reg, src_reg*/ + addbyte(0x28); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } + else + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x28); /*SUBB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x28); /*SUBB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } +} +static inline void SUB_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x45); + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x41); + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} +static inline void SUB_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & src_reg & 8) + { + addbyte(0x45); /*SUBL dst_reg, src_reg*/ + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (dst_reg & 8) + { + addbyte(0x41); /*SUBL dst_reg, src_reg*/ + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else if (src_reg & 8) + { + addbyte(0x44); /*SUBL dst_reg, src_reg*/ + addbyte(0x29); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + else + { + addbyte(0x29); /*SUBL dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } +} + +static inline int CMP_HOST_REG_B(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = (dst_reg & 0x10) | REG_EDX; + } + + SUB_HOST_REG_B(dst_reg, src_reg); + + return dst_reg & ~0x10; +} +static inline int CMP_HOST_REG_W(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = REG_EDX; + } + + SUB_HOST_REG_W(dst_reg, src_reg); + + return dst_reg; +} +static inline int CMP_HOST_REG_L(int dst_reg, int src_reg) +{ + if (dst_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((dst_reg & 7) << 3) | REG_EDX); + + dst_reg = REG_EDX; + } + + SUB_HOST_REG_L(dst_reg, src_reg); + + return dst_reg; +} + +static inline void ADD_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*ADDW host_reg, imm*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); + addbyte(0xC0 | (host_reg & 7)); + addword(imm << 8); + } + else + { + if (host_reg & 8) + addbyte(0x41); + addbyte(0x80); /*ADDB host_reg, imm*/ + addbyte(0xC0 | (host_reg & 7)); + addbyte(imm); + } +} +static inline void ADD_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + addbyte(0x66); /*ADDW host_reg, imm*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); + addbyte(0xC0 | (host_reg & 7)); + addword(imm); +} +static inline void ADD_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); /*ADDL host_reg, imm*/ + addbyte(0xC0 | (host_reg & 7)); + addlong(imm); +} + +static inline void SUB_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + if (host_reg & 0x10) + { + addbyte(0x66); /*SUBW host_reg, imm*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); + addbyte(0xE8 | (host_reg & 7)); + addword(imm << 8); + } + else + { + if (host_reg & 8) + addbyte(0x41); + addbyte(0x80); /*SUBB host_reg, imm*/ + addbyte(0xE8 | (host_reg & 7)); + addbyte(imm); + } +} +static inline void SUB_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + addbyte(0x66); /*SUBW host_reg, imm*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); + addbyte(0xE8 | (host_reg & 7)); + addword(imm); +} +static inline void SUB_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); /*SUBL host_reg, imm*/ + addbyte(0xE8 | (host_reg & 7)); + addlong(imm); +} + +static inline int CMP_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + if (host_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3) | REG_EDX); + + host_reg = (host_reg & 0x10) | REG_EDX; + } + + SUB_HOST_REG_IMM_B(host_reg, imm); + + return host_reg; +} +static inline int CMP_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + if (host_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3) | REG_EDX); + + host_reg = REG_EDX; + } + + SUB_HOST_REG_IMM_W(host_reg, imm); + + return host_reg; +} +static inline int CMP_HOST_REG_IMM_L(int host_reg, uint32_t imm) +{ + if (host_reg & 8) + { + addbyte(0x44); /*MOV EDX, dst_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3) | REG_EDX); + + host_reg = REG_EDX; + } + + SUB_HOST_REG_IMM(host_reg, imm); + + return host_reg; +} + +static inline void LOAD_STACK_TO_EA(int off) +{ + if (stack32) + { + addbyte(0x8b); /*MOVL EAX,[ESP]*/ + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + if (off) + { + addbyte(0x83); /*ADD EAX, off*/ + addbyte(0xc0 | (0 << 3) | REG_EAX); + addbyte(off); + } + } + else + { + addbyte(0x0f); /*MOVZX EAX,W[ESP]*/ + addbyte(0xb7); + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + if (off) + { + addbyte(0x66); /*ADD AX, off*/ + addbyte(0x05); + addword(off); + } + } +} +static inline void LOAD_EBP_TO_EA(int off) +{ + if (stack32) + { + addbyte(0x8b); /*MOVL EAX,[EBP]*/ + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_EBP].l)); + if (off) + { + addbyte(0x83); /*ADD EAX, off*/ + addbyte(0xc0 | (0 << 3) | REG_EAX); + addbyte(off); + } + } + else + { + addbyte(0x0f); /*MOVZX EAX,W[EBP]*/ + addbyte(0xb7); + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_BP].l)); + if (off) + { + addbyte(0x66); /*ADD AX, off*/ + addbyte(0x05); + addword(off); + } + } +} + +static inline void SP_MODIFY(int off) +{ + if (stack32) + { + if (off < 0x80) + { + addbyte(0x83); /*ADD [ESP], off*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + addbyte(off); + } + else + { + addbyte(0x81); /*ADD [ESP], off*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + addlong(off); + } + } + else + { + if (off < 0x80) + { + addbyte(0x66); /*ADD [SP], off*/ + addbyte(0x83); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + addbyte(off); + } + else + { + addbyte(0x66); /*ADD [SP], off*/ + addbyte(0x81); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + addword(off); + } + } +} + +static inline void TEST_ZERO_JUMP_W(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x66); /*CMPW host_reg, 0*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x83); + addbyte(0xc0 | 0x38 | (host_reg & 7)); + addbyte(0); + addbyte(0x75); /*JNZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} +static inline void TEST_ZERO_JUMP_L(int host_reg, uint32_t new_pc, int taken_cycles) +{ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x83); /*CMPW host_reg, 0*/ + addbyte(0xc0 | 0x38 | (host_reg & 7)); + addbyte(0); + addbyte(0x75); /*JNZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void TEST_NONZERO_JUMP_W(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x66); /*CMPW host_reg, 0*/ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x83); + addbyte(0xc0 | 0x38 | (host_reg & 7)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} +static inline void TEST_NONZERO_JUMP_L(int host_reg, uint32_t new_pc, int taken_cycles) +{ + if (host_reg & 8) + addbyte(0x41); + addbyte(0x83); /*CMPW host_reg, 0*/ + addbyte(0xc0 | 0x38 | (host_reg & 7)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void BRANCH_COND_BE(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + uint8_t *jump1; + + if (codegen_flags_changed && cpu_state.flags_op != FLAGS_UNKNOWN) + { + addbyte(0x83); /*CMP flags_res, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(flags_res)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + } + else + { + CALL_FUNC((uintptr_t)ZF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x75); /*JNZ +*/ + } + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + CALL_FUNC((uintptr_t)CF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(timing_bt ? 4 : 0)); + + if (!not) + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + if (not) + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; +} + +static inline void BRANCH_COND_L(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + CALL_FUNC((uintptr_t)NF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE BL*/ + addbyte(0x95); + addbyte(0xc3); + CALL_FUNC((uintptr_t)VF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE AL*/ + addbyte(0x95); + addbyte(0xc0); + addbyte(0x38); /*CMP AL, BL*/ + addbyte(0xd8); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(timing_bt ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void BRANCH_COND_LE(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + uint8_t *jump1; + if (codegen_flags_changed && cpu_state.flags_op != FLAGS_UNKNOWN) + { + addbyte(0x83); /*CMP flags_res, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(flags_res)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + } + else + { + CALL_FUNC((uintptr_t)ZF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x75); /*JNZ +*/ + } + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + CALL_FUNC((uintptr_t)NF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE BL*/ + addbyte(0x95); + addbyte(0xc3); + CALL_FUNC((uintptr_t)VF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE AL*/ + addbyte(0x95); + addbyte(0xc0); + addbyte(0x38); /*CMP AL, BL*/ + addbyte(0xd8); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(timing_bt ? 4 : 0)); + if (!not) + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + if (not) + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; +} + +static inline int LOAD_VAR_W(uintptr_t addr) +{ + int host_reg = REG_EBX; + + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x0f); /*MOVZX host_reg, offset[cpu_state]*/ + addbyte(0xb7); + addbyte(0x45 | (host_reg << 3)); + addbyte(addr - (uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + addbyte(0x0f); /*MOVZX host_reg,[reg]*/ + addbyte(0xb7); + addbyte(0x04 | (host_reg << 3)); + addbyte(0x25); + addlong((uint32_t)addr); + } + else + { + addbyte(0x48); /*MOV host_reg, &addr*/ + addbyte(0xb8 | host_reg); + addquad(addr); + addbyte(0x0f); /*MOVZX host_reg, [host_reg]*/ + addbyte(0xb7); + addbyte(host_reg | (host_reg << 3)); + } + + return host_reg; +} +static inline int LOAD_VAR_WL(uintptr_t addr) +{ + return LOAD_VAR_W(addr); +} +static inline int LOAD_VAR_L(uintptr_t addr) +{ + int host_reg = REG_EBX; + + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x8b); /*MOVL host_reg, offset[cpu_state]*/ + addbyte(0x45 | (host_reg << 3)); + addbyte(addr - (uintptr_t)&cpu_state - 128); + } + else if (IS_32_ADDR(addr)) + { + addbyte(0x8b); /*MOVL host_reg,[reg]*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(0x25); + addlong((uint32_t)addr); + } + else + { + addbyte(0x48); /*MOV host_reg, &addr*/ + addbyte(0xb8 | host_reg); + addquad(addr); + addbyte(0x8b); /*MOVL host_reg, [host_reg]*/ + addbyte(host_reg | (host_reg << 3)); + } + + return host_reg; +} + +static inline int COPY_REG(int src_reg) +{ + if (src_reg & 8) + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | REG_ECX | ((src_reg & 7) << 3)); + + return REG_ECX | (src_reg & 0x10); +} + +static inline int LOAD_HOST_REG(int host_reg) +{ + if (host_reg & 8) + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | REG_EBX | ((host_reg & 7) << 3)); + + return REG_EBX | (host_reg & 0x10); +} + +static inline int ZERO_EXTEND_W_B(int reg) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | (reg << 3)); + addbyte(0x0f); /*MOVZX EAX, AH*/ + addbyte(0xb6); + addbyte(0xc4); + + return REG_EAX; + } + + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX regl, regb*/ + addbyte(0xb6); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} +static inline int ZERO_EXTEND_L_B(int reg) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | (reg << 3)); + addbyte(0x0f); /*MOVZX EAX, AH*/ + addbyte(0xb6); + addbyte(0xc4); + + return REG_EAX; + } + + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX regl, regb*/ + addbyte(0xb6); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} +static inline int ZERO_EXTEND_L_W(int reg) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVZX regl, regw*/ + addbyte(0xb7); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} + +static inline int SIGN_EXTEND_W_B(int reg) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | (reg << 3)); + addbyte(0x0f); /*MOVSX EAX, AH*/ + addbyte(0xbe); + addbyte(0xc4); + + return REG_EAX; + } + + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVSX regl, regb*/ + addbyte(0xbe); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} +static inline int SIGN_EXTEND_L_B(int reg) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | (reg << 3)); + addbyte(0x0f); /*MOVSX EAX, AH*/ + addbyte(0xbe); + addbyte(0xc4); + + return REG_EAX; + } + + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVSX regl, regb*/ + addbyte(0xbe); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} +static inline int SIGN_EXTEND_L_W(int reg) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0x0f); /*MOVSX regl, regw*/ + addbyte(0xbf); + addbyte(0xc0 | (reg & 7)); + + return REG_EAX; +} + +static inline void SHL_B_IMM(int reg, int count) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EAX | ((reg & 7) << 3)); + addbyte(0xc0); /*SHL AH, count*/ + addbyte(0xe0 | REG_AH); + addbyte(count); + addbyte(0x41); /*MOV reg, EAX*/ + addbyte(0x89); + addbyte(0xc0 | (REG_EAX << 3) | (reg & 7)); + } + else + { + if (reg & 8) + addbyte(0x41); + addbyte(0xc0); /*SHL reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x20); + addbyte(count); + } +} +static inline void SHL_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SHL reg, count*/ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); + addbyte(0xc0 | (reg & 7) | 0x20); + addbyte(count); +} +static inline void SHL_L_IMM(int reg, int count) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); /*SHL reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x20); + addbyte(count); +} +static inline void SHR_B_IMM(int reg, int count) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EAX | ((reg & 7) << 3)); + addbyte(0xc0); /*SHR AH, count*/ + addbyte(0xe8 | REG_AH); + addbyte(count); + addbyte(0x41); /*MOV reg, EAX*/ + addbyte(0x89); + addbyte(0xc0 | (REG_EAX << 3) | (reg & 7)); + } + else + { + if (reg & 8) + addbyte(0x41); + addbyte(0xc0); /*SHR reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x28); + addbyte(count); + } +} +static inline void SHR_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SHR reg, count*/ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); + addbyte(0xc0 | (reg & 7) | 0x28); + addbyte(count); +} +static inline void SHR_L_IMM(int reg, int count) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); /*SHR reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x28); + addbyte(count); +} +static inline void SAR_B_IMM(int reg, int count) +{ + if (reg & 0x10) + { + addbyte(0x44); /*MOV EAX, reg*/ + addbyte(0x89); + addbyte(0xc0 | REG_EAX | ((reg & 7) << 3)); + addbyte(0xc0); /*SAR AH, count*/ + addbyte(0xf8 | REG_AH); + addbyte(count); + addbyte(0x41); /*MOV reg, EAX*/ + addbyte(0x89); + addbyte(0xc0 | (REG_EAX << 3) | (reg & 7)); + } + else + { + if (reg & 8) + addbyte(0x41); + addbyte(0xc0); /*SAR reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x38); + addbyte(count); + } +} +static inline void SAR_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SAR reg, count*/ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); + addbyte(0xc0 | (reg & 7) | 0x38); + addbyte(count); +} +static inline void SAR_L_IMM(int reg, int count) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0xc1); /*SAR reg, count*/ + addbyte(0xc0 | (reg & 7) | 0x38); + addbyte(count); +} + +static inline void NEG_HOST_REG_B(int reg) +{ + if (reg & 0x10) + { + if (reg & 8) + addbyte(0x44); + addbyte(0x89); /*MOV BX, reg*/ + addbyte(0xc3 | ((reg & 7) << 3)); + addbyte(0xf6); /*NEG BH*/ + addbyte(0xdf); + if (reg & 8) + addbyte(0x41); + addbyte(0x89); /*MOV reg, BX*/ + addbyte(0xd8 | (reg & 7)); + } + else + { + if (reg & 8) + addbyte(0x41); + addbyte(0xf6); + addbyte(0xd8 | (reg & 7)); + } +} +static inline void NEG_HOST_REG_W(int reg) +{ + addbyte(0x66); + if (reg & 8) + addbyte(0x41); + addbyte(0xf7); + addbyte(0xd8 | (reg & 7)); +} +static inline void NEG_HOST_REG_L(int reg) +{ + if (reg & 8) + addbyte(0x41); + addbyte(0xf7); + addbyte(0xd8 | (reg & 7)); +} + + +static inline void FP_ENTER() +{ + if (codegen_fpu_entered) + return; + if (IS_32_ADDR(&cr0)) + { + addbyte(0xf6); /*TEST cr0, 0xc*/ + addbyte(0x04); + addbyte(0x25); + addlong((uintptr_t)&cr0); + addbyte(0x0c); + } + else + { + addbyte(0x48); /*MOV RAX, &cr0*/ + addbyte(0xb8 | REG_EAX); + addquad((uint64_t)&cr0); + addbyte(0xf6); /*TEST [RAX], 0xc*/ + addbyte(0 | (REG_EAX << 3)); + addbyte(0x0c); + } + addbyte(0x74); /*JZ +*/ + addbyte(7+5+12+5); + addbyte(0xC7); /*MOVL [oldpc],op_old_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(op_old_pc); + load_param_1_32(&codeblock[block_current], 7); + CALL_FUNC((uintptr_t)x86_int); + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + + codegen_fpu_entered = 1; +} + +static inline void FP_FXCH(int reg) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + + addbyte(0x48); /*MOV RDX, ST[RBX*8]*/ + addbyte(0x8b); + addbyte(0x54); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + addbyte(0x48); /*MOV RCX, ST[RAX*8]*/ + addbyte(0x8b); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x48); /*MOV ST[RAX*8], RDX*/ + addbyte(0x89); + addbyte(0x54); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x48); /*MOV ST[RBX*8], RCX*/ + addbyte(0x89); + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + + addbyte(0x8a); /*MOV CL, tag[EAX]*/ + addbyte(0x4c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(0x8a); /*MOV DL, tag[EBX]*/ + addbyte(0x54); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(0x88); /*MOV tag[EBX], CL*/ + addbyte(0x4c); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(0x88); /*MOV tag[EAX], DL*/ + addbyte(0x54); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + + addbyte(0x48); /*MOV RDX, MM[RBX*8]*/ + addbyte(0x8b); + addbyte(0x54); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x48); /*MOV RCX, MM[RAX*8]*/ + addbyte(0x8b); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x48); /*MOV MM[RAX*8], RDX*/ + addbyte(0x89); + addbyte(0x54); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x48); /*MOV MM[RBX*8], RCX*/ + addbyte(0x89); + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + reg = reg; +} + + + +static inline void FP_FLD(int reg) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + } + else + { + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + } + + addbyte(0x48); /*MOV RCX, ST[EAX*8]*/ + addbyte(0x8b); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(0x07); + addbyte(0x48); /*MOV RDX, ST_i64[EAX*8]*/ + addbyte(0x8b); + addbyte(0x54); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x8a); /*MOV AL, [tag+EAX]*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(0x48); /*MOV ST[EBX*8], RCX*/ + addbyte(0x89); + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x48); /*MOV ST_i64[EBX*8], RDX*/ + addbyte(0x89); + addbyte(0x54); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x88); /*MOV [tag+EBX], AL*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); + + addbyte(0x89); /*MOV [TOP], EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); +} + +static inline void FP_FST(int reg) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x48); /*MOV RCX, ST[EAX*8]*/ + addbyte(0x8b); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [tag+EAX]*/ + addbyte(0x5c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + } + + addbyte(0x48); /*MOV ST[EAX*8], RCX*/ + addbyte(0x89); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x88); /*MOV [tag+EAX], BL*/ + addbyte(0x5c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); +} + +static inline void FP_POP() +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xc6); /*MOVB tag[EAX], 3*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(3); + addbyte(0x83); /*ADD AL, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x83); /*AND AL, 7*/ + addbyte(0xe0); + addbyte(7); + addbyte(0x89); /*MOV [TOP], EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); +} +static inline void FP_POP2() +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xc6); /*MOVB tag[EAX], 3*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(3); + addbyte(0x83); /*ADD AL, 2*/ + addbyte(0xc0); + addbyte(2); + addbyte(0x83); /*AND AL, 7*/ + addbyte(0xe0); + addbyte(7); + addbyte(0x89); /*MOV [TOP], EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); +} + +static inline void FP_LOAD_S() +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0xf3); /*CVTSS2SD XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x5a); + addbyte(0xc0); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x66); /*MOVQ [ST+EBX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); +} +static inline void FP_LOAD_D() +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x48); /*TEST RAX, RAX*/ + addbyte(0x85); + addbyte(0xc0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x48); /*MOVQ [ST+EBX*8], RAX*/ + addbyte(0x89); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); +} + +static inline void FP_LOAD_IW() +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x0f); /*MOVSX EAX, AX*/ + addbyte(0xbf); + addbyte(0xc0); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0xf2); /*CVTSI2SD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc0); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x66); /*MOVQ [ST+EBX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); +} +static inline void FP_LOAD_IL() +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0xf2); /*CVTSI2SD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc0); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x66); /*MOVQ [ST+EBX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); +} +static inline void FP_LOAD_IQ() +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0xf2); /*CVTSI2SDQ XMM0, RAX*/ + addbyte(0x48); + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc0); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x48); /*TEST RAX, RAX*/ + addbyte(0x85); + addbyte(0xc0); + addbyte(0x48); /*MOV [ST_i64+EBX*8], RAX*/ + addbyte(0x89); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x0f); /*SETE AL*/ + addbyte(0x94); + addbyte(0xc0); + addbyte(0x66); /*MOVQ [ST+EBX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0c); /*OR AL, TAG_UINT64*/ + addbyte(TAG_UINT64); + addbyte(0x88); /*MOV [tag+EBX], AL*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); +} + +static inline void FP_LOAD_IMM_Q(uint64_t v) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0xc7); /*MOV ST[EBP+EBX*8], v*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addlong(v & 0xffffffff); + addbyte(0xc7); /*MOV ST[EBP+EBX*8]+4, v*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST) + 4); + addlong(v >> 32); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xc6); /*MOV [tag+EBX], (v ? 0 : 1)*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(v ? 0 : 1); +} + +static inline void FP_FCHS() +{ + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xf2); /*SUBSD XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0xc0); + addbyte(0xf2); /*SUBSD XMM0, ST[EAX*8]*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EAX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xf2); /*MOVSD ST[EAX*8], XMM0*/ + addbyte(0x0f); + addbyte(0x11); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); +} + +static inline int FP_LOAD_REG(int reg) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc0 | REG_EBX); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe0 | REG_EBX); + addbyte(0x07); + } + addbyte(0xf3); /*MOVQ XMM0, ST[EBX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xf2); /*CVTSD2SS XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x5a); + addbyte(0xc0); + addbyte(0x66); /*MOVD EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0 | REG_EBX); + + return REG_EBX; +} +static inline void FP_LOAD_REG_D(int reg, int *host_reg1, int *host_reg2) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc0 | REG_EBX); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe0 | REG_EBX); + addbyte(0x07); + } + addbyte(0x48); /*MOV RBX, ST[EBX*8]*/ + addbyte(0x8b); + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + + *host_reg1 = REG_EBX; +} +static inline int64_t x87_fround(double b) +{ + int64_t a, c; + + switch ((cpu_state.npxc >> 10) & 3) + { + case 0: /*Nearest*/ + a = (int64_t)floor(b); + c = (int64_t)floor(b + 1.0); + if ((b - a) < (c - b)) + return a; + else if ((b - a) > (c - b)) + return c; + else + return (a & 1) ? c : a; + case 1: /*Down*/ + return (int64_t)floor(b); + case 2: /*Up*/ + return (int64_t)ceil(b); + case 3: /*Chop*/ + return (int64_t)b; + } + + return 0; +} +static inline int FP_LOAD_REG_INT_W(int reg) +{ + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + addbyte(0xf3); /*MOVQ XMM0, ST[EAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + + CALL_FUNC((uintptr_t)x87_fround); + + addbyte(0x93); /*XCHG EBX, EAX*/ + + return REG_EBX; +} +static inline int FP_LOAD_REG_INT(int reg) +{ + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + addbyte(0xf3); /*MOVQ XMM0, ST[EAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + + CALL_FUNC((uintptr_t)x87_fround); + + addbyte(0x93); /*XCHG EBX, EAX*/ + + return REG_EBX; +} +static inline void FP_LOAD_REG_INT_Q(int reg, int *host_reg1, int *host_reg2) +{ + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + + if (codegen_fpu_loaded_iq[cpu_state.TOP] && (cpu_state.tag[cpu_state.TOP] & TAG_UINT64)) + { + /*If we know the register was loaded with FILDq in this block and + has not been modified, then we can skip most of the conversion + and just load the 64-bit integer representation directly */ + addbyte(0x48); /*MOV RAX, [ST_i64+EAX*8]*/ + addbyte(0x8b); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + + addbyte(0x48); /*XCHG RBX, RAX*/ + addbyte(0x93); + + *host_reg1 = REG_EBX; + + return; + } + + addbyte(0xf6); /*TEST TAG[EAX], TAG_UINT64*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(TAG_UINT64); + + addbyte(0x74); /*JZ +*/ + addbyte(5+2); + + addbyte(0x48); /*MOV RAX, [ST_i64+EAX*8]*/ + addbyte(0x8b); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + + addbyte(0xeb); /*JMP done*/ + addbyte(6+12); + + addbyte(0xf3); /*MOVQ XMM0, ST[EAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + + CALL_FUNC((uintptr_t)x87_fround); + + addbyte(0x48); /*XCHG RBX, RAX*/ + addbyte(0x93); + + *host_reg1 = REG_EBX; +} + +#define FPU_ADD 0 +#define FPU_DIV 4 +#define FPU_DIVR 5 +#define FPU_MUL 1 +#define FPU_SUB 2 +#define FPU_SUBR 3 + +static inline void FP_OP_REG(int op, int dst, int src) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (dst) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(dst); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + } + if (src) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc0 | REG_EBX); + addbyte(src); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe0 | REG_EBX); + addbyte(0x07); + } + addbyte(0x80); /*AND tag[EAX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(TAG_NOT_UINT64); + if (op == FPU_DIVR || op == FPU_SUBR) + { + addbyte(0xf3); /*MOVQ XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + else + { + addbyte(0xf3); /*MOVQ XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } + switch (op) + { + case FPU_ADD: + addbyte(0xf2); /*ADDSD XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x58); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + case FPU_DIV: + addbyte(0xf2); /*DIVSD XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x5e); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + case FPU_DIVR: + addbyte(0xf2); /*DIVSD XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x5e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + case FPU_MUL: + addbyte(0xf2); /*MULSD XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x59); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + case FPU_SUB: + addbyte(0xf2); /*SUBSD XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + case FPU_SUBR: + addbyte(0xf2); /*SUBSD XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + break; + } + addbyte(0x66); /*MOVQ [RSI+RAX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); +} + +static inline void FP_OP_MEM(int op) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xf3); /*MOVQ XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EAX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag)); + addbyte(TAG_NOT_UINT64); + + switch (op) + { + case FPU_ADD: + addbyte(0xf2); /*ADDSD XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x58); + addbyte(0xc1); + break; + case FPU_DIV: + addbyte(0xf2); /*DIVSD XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x5e); + addbyte(0xc1); + break; + case FPU_DIVR: + addbyte(0xf2); /*DIVSD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x5e); + addbyte(0xc8); + break; + case FPU_MUL: + addbyte(0xf2); /*MULSD XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x59); + addbyte(0xc1); + break; + case FPU_SUB: + addbyte(0xf2); /*SUBSD XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0xc1); + break; + case FPU_SUBR: + addbyte(0xf2); /*SUBSD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x5c); + addbyte(0xc8); + break; + } + if (op == FPU_DIVR || op == FPU_SUBR) + { + addbyte(0x66); /*MOVQ ST[RAX*8], XMM1*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } + else + { + addbyte(0x66); /*MOVQ ST[RAX*8], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} + +static inline void FP_OP_S(int op) +{ + addbyte(0x66); /*MOVD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc8); + addbyte(0xf3); /*CVTSS2SD XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x5a); + addbyte(0xc9); + FP_OP_MEM(op); +} +static inline void FP_OP_D(int op) +{ + addbyte(0x66); /*MOVQ XMM1, RAX*/ + addbyte(0x48); + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc8); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0x0f); /*STMXCSR [ESP+8]*/ + addbyte(0xae); + addbyte(0x5c); + addbyte(0x24); + addbyte(0x08); + addbyte(0x8b); /*MOV EAX, [ESP+8]*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x08); + addbyte(0x25); /*AND EAX, ~(3 << 13)*/ + addlong(~(3 << 10)); + addbyte(0x0d); /*OR EAX, (npxc & (3 << 10)) << 3*/ + addlong((cpu_state.npxc & (3 << 10)) << 3); + addbyte(0x89); /*MOV [RSP+12], EAX*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x0c); + addbyte(0x0f); /*LDMXCSR [RSP+12]*/ + addbyte(0xae); + addbyte(0x54); + addbyte(0x24); + addbyte(0x0c); + } + FP_OP_MEM(op); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0x0f); /*LDMXCSR [RSP+8]*/ + addbyte(0xae); + addbyte(0x54); + addbyte(0x24); + addbyte(0x08); + } +} +static inline void FP_OP_IW(int op) +{ + addbyte(0x0f); /*MOVSX EAX, AX*/ + addbyte(0xbf); + addbyte(0xc0); + addbyte(0xf2); /*CVTSI2SD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc8); + FP_OP_MEM(op); +} +static inline void FP_OP_IL(int op) +{ + addbyte(0xf2); /*CVTSI2SD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc8); + FP_OP_MEM(op); +} + +static inline void FP_COMPARE_REG(int dst, int src) +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (src || dst) + { + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(src ? src : dst); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + + addbyte(0x8a); /*MOV CL, [npxs+1]*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0x80); /*AND CL, ~(C0|C2|C3)*/ + addbyte(0xe1); + addbyte((~(C0|C2|C3)) >> 8); + + if (src) + { + addbyte(0xf3); /*MOVQ XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x66); /*COMISD XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x2f); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } + else + { + addbyte(0xf3); /*MOVQ XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x66); /*COMISD XMM0, ST[RBX*8]*/ + addbyte(0x0f); + addbyte(0x2f); + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + + addbyte(0x9f); /*LAHF*/ + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR CL, AH*/ + addbyte(0xe1); + addbyte(0x88); /*MOV [npxs+1], CL*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); +} + +static inline void FP_COMPARE_MEM() +{ + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + + addbyte(0x8a); /*MOV CL, [npxs+1]*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xf3); /*MOVQ XMM0, ST[RAX*8]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND CL, ~(C0|C2|C3)*/ + addbyte(0xe1); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0x66); /*COMISD XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x2f); + addbyte(0xc1); + addbyte(0x9f); /*LAHF*/ + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR CL, AH*/ + addbyte(0xe1); + addbyte(0x88); /*MOV [npxs+1], CL*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); +} +static inline void FP_COMPARE_S() +{ + addbyte(0x66); /*MOVD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc8); + addbyte(0xf3); /*CVTSS2SD XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x5a); + addbyte(0xc9); + FP_COMPARE_MEM(); +} +static inline void FP_COMPARE_D() +{ + addbyte(0x66); /*MOVQ XMM1, RAX*/ + addbyte(0x48); + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc8); + FP_COMPARE_MEM(); +} +static inline void FP_COMPARE_IW() +{ + addbyte(0x0f); /*MOVSX EAX, AX*/ + addbyte(0xbf); + addbyte(0xc0); + addbyte(0xf2); /*CVTSI2SD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc8); + FP_COMPARE_MEM(); +} +static inline void FP_COMPARE_IL() +{ + addbyte(0xf2); /*CVTSI2SD XMM1, EAX*/ + addbyte(0x0f); + addbyte(0x2a); + addbyte(0xc8); + FP_COMPARE_MEM(); +} + +static inline void UPDATE_NPXC(int reg) +{ +} + +static inline void SET_BITS(uintptr_t addr, uint32_t val) +{ + if (IS_32_ADDR(addr)) + { + if (val & ~0xff) + { + addbyte(0x81); /*OR [addr], val*/ + addbyte(0x0c); + addbyte(0x25); + addlong(addr); + addlong(val); + } + else + { + addbyte(0x80); /*OR [addr], val*/ + addbyte(0x0c); + addbyte(0x25); + addlong(addr); + addbyte(val); + } + } + else + { + addbyte(0x48); /*MOV RAX, &addr*/ + addbyte(0xb8 | REG_EAX); + addquad(addr); + if (val & ~0xff) + { + addbyte(0x81); /*OR [RAX], val*/ + addbyte(0x08); + addlong(val); + } + else + { + addbyte(0x80); /*OR [RAX], val*/ + addbyte(0x08); + addbyte(val); + } + } +} + +static inline void CLEAR_BITS(uintptr_t addr, uint32_t val) +{ + if (IS_32_ADDR(addr)) + { + if (val & ~0xff) + { + addbyte(0x81); /*AND [addr], val*/ + addbyte(0x24); + addbyte(0x25); + addlong(addr); + addlong(~val); + } + else + { + addbyte(0x80); /*AND [addr], val*/ + addbyte(0x24); + addbyte(0x25); + addlong(addr); + addbyte(~val); + } + } + else + { + addbyte(0x48); /*MOV RAX, &addr*/ + addbyte(0xb8 | REG_EAX); + addquad(addr); + if (val & ~0xff) + { + addbyte(0x81); /*AND [RAX], val*/ + addbyte(0x20); + addlong(~val); + } + else + { + addbyte(0x80); /*AND [RAX], val*/ + addbyte(0x20); + addbyte(~val); + } + } +} + +#define LOAD_Q_REG_1 REG_EAX +#define LOAD_Q_REG_2 REG_EDX + +static inline void MMX_ENTER() +{ + if (codegen_mmx_entered) + return; + + if (IS_32_ADDR(&cr0)) + { + addbyte(0xf6); /*TEST cr0, 0xc*/ + addbyte(0x04); + addbyte(0x25); + addlong((uintptr_t)&cr0); + addbyte(0x0c); + } + else + { + addbyte(0x48); /*MOV RAX, &cr0*/ + addbyte(0xb8 | REG_EAX); + addquad((uint64_t)&cr0); + addbyte(0xf6); /*TEST [RAX], 0xc*/ + addbyte(0 | (REG_EAX << 3)); + addbyte(0x0c); + } + addbyte(0x74); /*JZ +*/ + addbyte(7+5+12+5); + addbyte(0xC7); /*MOVL [oldpc],op_old_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(op_old_pc); + load_param_1_32(&codeblock[block_current], 7); + CALL_FUNC((uintptr_t)x86_int); + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + + + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xc6); /*MOV ISMMX, 1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ismmx)); + addbyte(1); + addbyte(0x89); /*MOV TOP, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV tag, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x89); /*MOV tag+4, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[4])); + + codegen_mmx_entered = 1; +} + +extern int mmx_ebx_ecx_loaded; + +static inline int LOAD_MMX_D(int guest_reg) +{ + int host_reg = REG_EBX; + + addbyte(0x8b); /*MOV EBX, reg*/ + addbyte(0x44 | (host_reg << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); + + return host_reg; +} +static inline void LOAD_MMX_Q(int guest_reg, int *host_reg1, int *host_reg2) +{ + int host_reg = REG_EBX; + + if (host_reg & 8) + addbyte(0x4c); + else + addbyte(0x48); + addbyte(0x8b); /*MOV RBX, reg*/ + addbyte(0x44 | ((host_reg & 7) << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].q)); + + *host_reg1 = host_reg; +} +static inline int LOAD_MMX_Q_MMX(int guest_reg) +{ + int dst_reg = find_host_xmm_reg(); + host_reg_xmm_mapping[dst_reg] = 100; + + addbyte(0xf3); /*MOV XMMx, reg*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x44 | ((dst_reg & 7) << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].q)); + + return dst_reg; +} + +static inline int LOAD_INT_TO_MMX(int src_reg1, int src_reg2) +{ + int dst_reg = find_host_xmm_reg(); + host_reg_xmm_mapping[dst_reg] = 100; + + addbyte(0x66); /*MOVQ host_reg, src_reg1*/ + if (src_reg1 & 8) + addbyte(0x49); + else + addbyte(0x48); + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0 | (dst_reg << 3) | (src_reg1 & 7)); + + return dst_reg; +} + +static inline void STORE_MMX_LQ(int guest_reg, int host_reg1) +{ + addbyte(0xC7); /*MOVL [reg],0*/ + addbyte(0x44); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[1])); + addlong(0); + if (host_reg1 & 8) + addbyte(0x44); + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x44 | ((host_reg1 & 7) << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); +} +static inline void STORE_MMX_Q(int guest_reg, int host_reg1, int host_reg2) +{ + if (host_reg1 & 8) + addbyte(0x4c); + else + addbyte(0x48); + addbyte(0x89); /*MOV [reg],host_reg*/ + addbyte(0x44 | ((host_reg1 & 7) << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); +} +static inline void STORE_MMX_Q_MMX(int guest_reg, int host_reg) +{ + addbyte(0x66); /*MOVQ [guest_reg],host_reg*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x44 | (host_reg << 3)); + addbyte(0x25); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].q)); +} + +#define MMX_x86_OP(name, opcode) \ +static inline void MMX_ ## name(int dst_reg, int src_reg) \ +{ \ + addbyte(0x66); /*op dst_reg, src_reg*/ \ + addbyte(0x0f); \ + addbyte(opcode); \ + addbyte(0xc0 | (dst_reg << 3) | src_reg); \ +} + +MMX_x86_OP(AND, 0xdb) +MMX_x86_OP(ANDN, 0xdf) +MMX_x86_OP(OR, 0xeb) +MMX_x86_OP(XOR, 0xef) + +MMX_x86_OP(ADDB, 0xfc) +MMX_x86_OP(ADDW, 0xfd) +MMX_x86_OP(ADDD, 0xfe) +MMX_x86_OP(ADDSB, 0xec) +MMX_x86_OP(ADDSW, 0xed) +MMX_x86_OP(ADDUSB, 0xdc) +MMX_x86_OP(ADDUSW, 0xdd) + +MMX_x86_OP(SUBB, 0xf8) +MMX_x86_OP(SUBW, 0xf9) +MMX_x86_OP(SUBD, 0xfa) +MMX_x86_OP(SUBSB, 0xe8) +MMX_x86_OP(SUBSW, 0xe9) +MMX_x86_OP(SUBUSB, 0xd8) +MMX_x86_OP(SUBUSW, 0xd9) + +MMX_x86_OP(PUNPCKLBW, 0x60); +MMX_x86_OP(PUNPCKLWD, 0x61); +MMX_x86_OP(PUNPCKLDQ, 0x62); +MMX_x86_OP(PCMPGTB, 0x64); +MMX_x86_OP(PCMPGTW, 0x65); +MMX_x86_OP(PCMPGTD, 0x66); + +MMX_x86_OP(PCMPEQB, 0x74); +MMX_x86_OP(PCMPEQW, 0x75); +MMX_x86_OP(PCMPEQD, 0x76); + +MMX_x86_OP(PSRLW, 0xd1); +MMX_x86_OP(PSRLD, 0xd2); +MMX_x86_OP(PSRLQ, 0xd3); +MMX_x86_OP(PSRAW, 0xe1); +MMX_x86_OP(PSRAD, 0xe2); +MMX_x86_OP(PSLLW, 0xf1); +MMX_x86_OP(PSLLD, 0xf2); +MMX_x86_OP(PSLLQ, 0xf3); + +MMX_x86_OP(PMULLW, 0xd5); +MMX_x86_OP(PMULHW, 0xe5); +MMX_x86_OP(PMADDWD, 0xf5); + +static inline void MMX_PACKSSWB(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKSSWB dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x63); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PACKUSWB(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKUSWB dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PACKSSDW(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKSSDW dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PUNPCKHBW(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLBW dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} +static inline void MMX_PUNPCKHWD(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLWD dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} +static inline void MMX_PUNPCKHDQ(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLDQ dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} + +static inline void MMX_PSRLW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + +static inline void MMX_PSRLD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + +static inline void MMX_PSRLQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + + +static inline void SAVE_EA() +{ + addbyte(0x89); /*MOV [ESP+0x24], EAX*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x24); +} +static inline void LOAD_EA() +{ + addbyte(0x8b); /*MOV EAX, [ESP+0x24]*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x24); +} + +#define MEM_CHECK_WRITE_B MEM_CHECK_WRITE + +static inline void MEM_CHECK_WRITE(x86seg *seg) +{ + uint8_t *jump1, *jump2, *jump3 = NULL; + + CHECK_SEG_WRITE(seg); + + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + if (IS_32_ADDR(&cr0)) + { + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&cr0); + addbyte(0); + } + else + { + addbyte(0x48); /*MOV RDI, &cr0*/ + addbyte(0xbf); + addquad((uint64_t)&cr0); + addbyte(0x83); /*CMPL [RDI], 0*/ + addbyte(0x3f); + addbyte(0); + } + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + jump3 = &codeblock[block_current].data[block_pos]; + addbyte(0); + } + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)(uintptr_t)writelookup2); + addbyte(-1); + } + else + { + addbyte(0x48); /*MOV RCX, writelookup2*/ + addbyte(0xb9); + addquad((uint64_t)writelookup2); + addbyte(0x83); /*CMP [RCX+RDI*8], -1*/ + addbyte(0x3c); + addbyte(0xf9); + addbyte(-1); + } + addbyte(0x75); /*JNE +*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); + + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + *jump3 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump3 - 1; + /*slowpath:*/ + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + + + load_param_1_reg_32(REG_EDI); + load_param_2_32(&codeblock[block_current], 1); + + call(&codeblock[block_current], (uintptr_t)mmutranslatereal32); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uintptr_t)(&codeblock[block_current].data[block_pos]) + 4)); + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + + LOAD_EA(); +} + +static inline void MEM_CHECK_WRITE_W(x86seg *seg) +{ + uint8_t *jump1, *jump2, *jump3, *jump4 = NULL; + int jump_pos; + + CHECK_SEG_WRITE(seg); + + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + if (IS_32_ADDR(&cr0)) + { + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&cr0); + addbyte(0); + } + else + { + addbyte(0x48); /*MOV RDI, &cr0*/ + addbyte(0xbf); + addquad((uint64_t)&cr0); + addbyte(0x83); /*CMPL [RDI], 0*/ + addbyte(0x3f); + addbyte(0); + } + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + } + addbyte(0x8d); /*LEA ESI, 1[EDI]*/ + addbyte(0x77); + addbyte(0x01); + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x74); /*JE slowpath*/ + jump4 = &codeblock[block_current].data[block_pos]; + addbyte(0); + } + addbyte(0x89); /*MOV EBX, EDI*/ + addbyte(0xfb); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)(uintptr_t)writelookup2); + addbyte(-1); + } + else + { + addbyte(0x48); /*MOV RAX, writelookup2*/ + addbyte(0xb8); + addquad((uint64_t)writelookup2); + addbyte(0x83); /*CMP [RAX+RDI*8], -1*/ + addbyte(0x3c); + addbyte(0xf8); + addbyte(-1); + } + addbyte(0x74); /*JE +*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ + addbyte(0x3c); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + addbyte(-1); + } + else + { + addbyte(0x83); /*CMP [RAX+RSI*8], -1*/ + addbyte(0x3c); + addbyte(0xf0); + addbyte(-1); + } + addbyte(0x75); /*JNE +*/ + jump3 = &codeblock[block_current].data[block_pos]; + addbyte(0); + + /*slowpath:*/ + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + *jump4 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump4 - 1; + jump_pos = block_pos; + load_param_1_reg_32(REG_EBX); + load_param_2_32(&codeblock[block_current], 1); + call(&codeblock[block_current], (uintptr_t)mmutranslatereal32); + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uintptr_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $fff, EBX*/ + addbyte(0xc3); + addlong(0xfff); + addbyte(0x74); /*JNE slowpath*/ + addbyte(jump_pos - block_pos - 1); + + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump3 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump3 - 1; + + LOAD_EA(); +} + +static inline void MEM_CHECK_WRITE_L(x86seg *seg) +{ + uint8_t *jump1, *jump2, *jump3, *jump4 = NULL; + int jump_pos; + + CHECK_SEG_WRITE(seg); + + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + if (IS_32_ADDR(&cr0)) + { + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&cr0); + addbyte(0); + } + else + { + addbyte(0x48); /*MOV RDI, &cr0*/ + addbyte(0xbf); + addquad((uint64_t)&cr0); + addbyte(0x83); /*CMPL [RDI], 0*/ + addbyte(0x3f); + addbyte(0); + } + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + } + addbyte(0x8d); /*LEA ESI, 3[EDI]*/ + addbyte(0x77); + addbyte(0x03); + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x74); /*JE slowpath*/ + jump4 = &codeblock[block_current].data[block_pos]; + addbyte(0); + } + addbyte(0x89); /*MOV EBX, EDI*/ + addbyte(0xfb); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)(uintptr_t)writelookup2); + addbyte(-1); + } + else + { + addbyte(0x48); /*MOV RAX, writelookup2*/ + addbyte(0xb8); + addquad((uint64_t)writelookup2); + addbyte(0x83); /*CMP [RAX+RDI*8], -1*/ + addbyte(0x3c); + addbyte(0xf8); + addbyte(-1); + } + addbyte(0x74); /*JE slowpath*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ + addbyte(0x3c); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + addbyte(-1); + } + else + { + addbyte(0x83); /*CMP [RAX+RSI*8], -1*/ + addbyte(0x3c); + addbyte(0xf0); + addbyte(-1); + } + addbyte(0x75); /*JNE +*/ + jump3 = &codeblock[block_current].data[block_pos]; + addbyte(0); + + /*slowpath:*/ + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + if (!(seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) && !(seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + *jump4 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump4 - 1; + jump_pos = block_pos; + load_param_1_reg_32(REG_EBX); + load_param_2_32(&codeblock[block_current], 1); + call(&codeblock[block_current], (uintptr_t)mmutranslatereal32); + addbyte(0x83); /*ADD EBX, 3*/ + addbyte(0xc3); + addbyte(3); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uintptr_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $ffc, EBX*/ + addbyte(0xc3); + addlong(0xffc); + addbyte(0x74); /*JE slowpath*/ + addbyte(jump_pos - block_pos - 1); + + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump3 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump3 - 1; + + LOAD_EA(); +} + +static inline int MEM_LOAD_ADDR_EA_B_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2); + addbyte(0x8b); /*MOV AL,[RDI+RSI]*/ + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemb386l); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static inline int MEM_LOAD_ADDR_EA_W_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+2); + addbyte(0x66); /*MOV AX,[RDI+RSI]*/ + addbyte(0x8b); + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemwl); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static inline int MEM_LOAD_ADDR_EA_L_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2); + addbyte(0x8b); /*MOV EAX,[RDI+RSI]*/ + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)readmemll); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} + +static inline void MEM_STORE_ADDR_EA_B_NO_ABRT(x86seg *seg, int host_reg) +{ + if (host_reg & 0x10) + { + /*Handle high byte of register*/ + if (host_reg & 8) + { + addbyte(0x45); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + addbyte(0x66); /*SHR R8, 8*/ + addbyte(0x41); + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + host_reg = 8; + } + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 4:3)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x88); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x88); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)writememb386l); + /*done:*/ +} +static inline void MEM_STORE_ADDR_EA_W_NO_ABRT(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 5:4)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 5:4)+2); + if (host_reg & 8) + { + addbyte(0x66); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x66); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)writememwl); + /*done:*/ +} +static inline void MEM_STORE_ADDR_EA_L_NO_ABRT(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + else if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)(uintptr_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)(uintptr_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 4:3)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 4:3)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x89); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long((uintptr_t)writememll); + /*done:*/ +} + +static inline void LOAD_SEG(int host_reg, void *seg) +{ + load_param_2_64(&codeblock[block_current], (uint64_t)seg); + load_param_1_reg_32(host_reg); + CALL_FUNC((uintptr_t)loadseg); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} diff --git a/src/codegen/codegen_ops_x86.h b/src/codegen/codegen_ops_x86.h new file mode 100644 index 000000000..5dcfdef87 --- /dev/null +++ b/src/codegen/codegen_ops_x86.h @@ -0,0 +1,3988 @@ +/*Register allocation : + EBX, ECX, EDX - emulated registers + EAX - work register, EA storage + ESI, EDI - work registers + EBP - points at emulated register array +*/ +#define HOST_REG_START 1 +#define HOST_REG_END 4 +#define HOST_REG_XMM_START 0 +#define HOST_REG_XMM_END 7 +static inline int find_host_reg() +{ + int c; + for (c = HOST_REG_START; c < HOST_REG_END; c++) + { + if (host_reg_mapping[c] == -1) + break; + } + + if (c == NR_HOST_REGS) + fatal("Out of host regs!\n"); + return c; +} +static inline int find_host_xmm_reg() +{ + int c; + for (c = HOST_REG_XMM_START; c < HOST_REG_XMM_END; c++) + { + if (host_reg_xmm_mapping[c] == -1) + break; + } + + if (c == HOST_REG_XMM_END) + fatal("Out of host XMM regs!\n"); + return c; +} + +#if 0 +static inline void STORE_IMM_ADDR_B(uintptr_t addr, uint8_t val) +{ + addbyte(0xC6); /*MOVB [addr],val*/ + addbyte(0x05); + addlong(addr); + addbyte(val); +} +static inline void STORE_IMM_ADDR_W(uintptr_t addr, uint16_t val) +{ + addbyte(0x66); /*MOVW [addr],val*/ + addbyte(0xC7); + addbyte(0x05); + addlong(addr); + addword(val); +} +#endif +static inline void STORE_IMM_ADDR_L(uintptr_t addr, uint32_t val) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0xC7); /*MOVL [addr],val*/ + addbyte(0x45); + addbyte(addr - (uint32_t)&cpu_state - 128); + addlong(val); + } + else + { + addbyte(0xC7); /*MOVL [addr],val*/ + addbyte(0x05); + addlong(addr); + addlong(val); + } +} + +static inline void STORE_IMM_REG_B(int reg, uint8_t val) +{ + addbyte(0xC6); /*MOVB [addr],val*/ + addbyte(0x45); + if (reg & 4) + addbyte((uint8_t)cpu_state_offset(regs[reg & 3].b.h)); + else + addbyte((uint8_t)cpu_state_offset(regs[reg & 3].b.l)); + addbyte(val); +} +static inline void STORE_IMM_REG_W(int reg, uint16_t val) +{ + addbyte(0x66); /*MOVW [addr],val*/ + addbyte(0xC7); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[reg & 7].w)); + addword(val); +} +static inline void STORE_IMM_REG_L(int reg, uint32_t val) +{ + addbyte(0xC7); /*MOVL [addr],val*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[reg & 7].l)); + addlong(val); +} + +static inline int LOAD_REG_B(int reg) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = reg; + + addbyte(0x0f); /*MOVZX B[reg],host_reg*/ + addbyte(0xb6); + addbyte(0x45 | (host_reg << 3)); + if (reg & 4) + addbyte((uint8_t)cpu_state_offset(regs[reg & 3].b.h)); + else + addbyte((uint8_t)cpu_state_offset(regs[reg & 3].b.l)); + + return host_reg; +} +static inline int LOAD_REG_W(int reg) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = reg; + + addbyte(0x0f); /*MOVZX W[reg],host_reg*/ + addbyte(0xb7); + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[reg & 7].w)); + + return host_reg; +} +static inline int LOAD_REG_L(int reg) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = reg; + + addbyte(0x8b); /*MOVL host_reg,[reg]*/ + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[reg & 7].l)); + + return host_reg; +} + +static inline int LOAD_VAR_W(uintptr_t addr) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = 0; + + addbyte(0x66); /*MOVL host_reg,[reg]*/ + addbyte(0x8b); + addbyte(0x05 | (host_reg << 3)); + addlong((uint32_t)addr); + + return host_reg; +} +static inline int LOAD_VAR_WL(uintptr_t addr) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = 0; + + addbyte(0x0f); /*MOVZX host_reg, [addr]*/ + addbyte(0xb7); + addbyte(0x05 | (host_reg << 3)); + addlong((uint32_t)addr); + + return host_reg; +} +static inline int LOAD_VAR_L(uintptr_t addr) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = 0; + + addbyte(0x8b); /*MOVL host_reg,[reg]*/ + addbyte(0x05 | (host_reg << 3)); + addlong((uint32_t)addr); + + return host_reg; +} + +static inline int LOAD_REG_IMM(uint32_t imm) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = 0; + + addbyte(0xc7); /*MOVL host_reg, imm*/ + addbyte(0xc0 | host_reg); + addlong(imm); + + return host_reg; +} + +static inline int LOAD_HOST_REG(int host_reg) +{ + int new_host_reg = find_host_reg(); + host_reg_mapping[new_host_reg] = 0; + + addbyte(0x89); /*MOV new_host_reg, host_reg*/ + addbyte(0xc0 | (host_reg << 3) | new_host_reg); + + return new_host_reg; +} + +static inline void STORE_REG_B_RELEASE(int host_reg) +{ + addbyte(0x88); /*MOVB [reg],host_reg*/ + addbyte(0x45 | (host_reg << 3)); + if (host_reg_mapping[host_reg] & 4) + addbyte((uint8_t)cpu_state_offset(regs[host_reg_mapping[host_reg] & 3].b.h)); + else + addbyte((uint8_t)cpu_state_offset(regs[host_reg_mapping[host_reg] & 3].b.l)); + host_reg_mapping[host_reg] = -1; +} +static inline void STORE_REG_W_RELEASE(int host_reg) +{ + addbyte(0x66); /*MOVW [reg],host_reg*/ + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[host_reg_mapping[host_reg]].w)); + host_reg_mapping[host_reg] = -1; +} +static inline void STORE_REG_L_RELEASE(int host_reg) +{ + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[host_reg_mapping[host_reg]].l)); + host_reg_mapping[host_reg] = -1; +} + +static inline void STORE_REG_TARGET_B_RELEASE(int host_reg, int guest_reg) +{ + addbyte(0x88); /*MOVB [guest_reg],host_reg*/ + addbyte(0x45 | (host_reg << 3)); + if (guest_reg & 4) + addbyte((uint8_t)cpu_state_offset(regs[guest_reg & 3].b.h)); + else + addbyte((uint8_t)cpu_state_offset(regs[guest_reg & 3].b.l)); + host_reg_mapping[host_reg] = -1; +} +static inline void STORE_REG_TARGET_W_RELEASE(int host_reg, int guest_reg) +{ + addbyte(0x66); /*MOVW [guest_reg],host_reg*/ + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[guest_reg & 7].w)); + host_reg_mapping[host_reg] = -1; +} +static inline void STORE_REG_TARGET_L_RELEASE(int host_reg, int guest_reg) +{ + addbyte(0x89); /*MOVL [guest_reg],host_reg*/ + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(regs[guest_reg & 7].l)); + host_reg_mapping[host_reg] = -1; +} + +static inline void RELEASE_REG(int host_reg) +{ + host_reg_mapping[host_reg] = -1; +} + +static inline void STORE_HOST_REG_ADDR_W(uintptr_t addr, int host_reg) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x66); /*MOVW [addr],host_reg*/ + addbyte(0x89); + addbyte(0x45 | (host_reg << 3)); + addbyte((uint32_t)addr - (uint32_t)&cpu_state - 128); + } + else + { + addbyte(0x66); /*MOVL [reg],host_reg*/ + addbyte(0x89); + addbyte(0x05 | (host_reg << 3)); + addlong(addr); + } +} +static inline void STORE_HOST_REG_ADDR(uintptr_t addr, int host_reg) +{ + if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) + { + addbyte(0x89); /*MOVL [addr],host_reg*/ + addbyte(0x45 | (host_reg << 3)); + addbyte((uint32_t)addr - (uint32_t)&cpu_state - 128); + } + else + { + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x05 | (host_reg << 3)); + addlong(addr); + } +} +#define STORE_HOST_REG_ADDR_BL STORE_HOST_REG_ADDR +#define STORE_HOST_REG_ADDR_WL STORE_HOST_REG_ADDR + +static inline void ADD_HOST_REG_B(int dst_reg, int src_reg) +{ + addbyte(0x00); /*ADDB dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void ADD_HOST_REG_W(int dst_reg, int src_reg) +{ + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x01); + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void ADD_HOST_REG_L(int dst_reg, int src_reg) +{ + addbyte(0x01); /*ADDL dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void ADD_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + addbyte(0x80); /*ADDB host_reg, imm*/ + addbyte(0xC0 | host_reg); + addbyte(imm); +} +static inline void ADD_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + if (imm < 0x80 || imm >= 0xff80) + { + addbyte(0x66); /*ADDW host_reg, imm*/ + addbyte(0x83); + addbyte(0xC0 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x66); /*ADDW host_reg, imm*/ + addbyte(0x81); + addbyte(0xC0 | host_reg); + addword(imm); + } +} +static inline void ADD_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (imm < 0x80 || imm >= 0xffffff80) + { + addbyte(0x83); /*ADDL host_reg, imm*/ + addbyte(0xC0 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x81); /*ADDL host_reg, imm*/ + addbyte(0xC0 | host_reg); + addlong(imm); + } +} + +#define AND_HOST_REG_B AND_HOST_REG_L +#define AND_HOST_REG_W AND_HOST_REG_L +static inline void AND_HOST_REG_L(int dst_reg, int src_reg) +{ + addbyte(0x21); /*ANDL dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void AND_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (imm < 0x80 || imm >= 0xffffff80) + { + addbyte(0x83); /*ANDL host_reg, imm*/ + addbyte(0xE0 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x81); /*ANDL host_reg, imm*/ + addbyte(0xE0 | host_reg); + addlong(imm); + } +} +static inline int TEST_HOST_REG_B(int dst_reg, int src_reg) +{ + AND_HOST_REG_B(dst_reg, src_reg); + + return dst_reg; +} +static inline int TEST_HOST_REG_W(int dst_reg, int src_reg) +{ + AND_HOST_REG_W(dst_reg, src_reg); + + return dst_reg; +} +static inline int TEST_HOST_REG_L(int dst_reg, int src_reg) +{ + AND_HOST_REG_L(dst_reg, src_reg); + + return dst_reg; +} +static inline int TEST_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + AND_HOST_REG_IMM(host_reg, imm); + + return host_reg; +} + +#define OR_HOST_REG_B OR_HOST_REG_L +#define OR_HOST_REG_W OR_HOST_REG_L +static inline void OR_HOST_REG_L(int dst_reg, int src_reg) +{ + addbyte(0x09); /*ORL dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void OR_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (imm < 0x80 || imm >= 0xffffff80) + { + addbyte(0x83); /*ORL host_reg, imm*/ + addbyte(0xC8 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x81); /*ORL host_reg, imm*/ + addbyte(0xC8 | host_reg); + addlong(imm); + } +} + +static inline void NEG_HOST_REG_B(int reg) +{ + addbyte(0xf6); + addbyte(0xd8 | reg); +} +static inline void NEG_HOST_REG_W(int reg) +{ + addbyte(0x66); + addbyte(0xf7); + addbyte(0xd8 | reg); +} +static inline void NEG_HOST_REG_L(int reg) +{ + addbyte(0xf7); + addbyte(0xd8 | reg); +} + +static inline void SUB_HOST_REG_B(int dst_reg, int src_reg) +{ + addbyte(0x28); /*SUBB dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void SUB_HOST_REG_W(int dst_reg, int src_reg) +{ + addbyte(0x66); /*SUBW dst_reg, src_reg*/ + addbyte(0x29); + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void SUB_HOST_REG_L(int dst_reg, int src_reg) +{ + addbyte(0x29); /*SUBL dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void SUB_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + addbyte(0x80); /*SUBB host_reg, imm*/ + addbyte(0xE8 | host_reg); + addbyte(imm); +} +static inline void SUB_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + if (imm < 0x80 || imm >= 0xff80) + { + addbyte(0x66); /*SUBW host_reg, imm*/ + addbyte(0x83); + addbyte(0xE8 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x66); /*SUBW host_reg, imm*/ + addbyte(0x81); + addbyte(0xE8 | host_reg); + addword(imm); + } +} +static inline void SUB_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (imm < 0x80 || imm >= 0xffffff80) + { + addbyte(0x83); /*SUBL host_reg, imm*/ + addbyte(0xE8 | host_reg); + addbyte(imm); + } + else + { + addbyte(0x81); /*SUBL host_reg, imm*/ + addbyte(0xE8 | host_reg); + addlong(imm); + } +} + +static inline int CMP_HOST_REG_B(int dst_reg, int src_reg) +{ + SUB_HOST_REG_B(dst_reg, src_reg); + + return dst_reg; +} +static inline int CMP_HOST_REG_W(int dst_reg, int src_reg) +{ + SUB_HOST_REG_W(dst_reg, src_reg); + + return dst_reg; +} +static inline int CMP_HOST_REG_L(int dst_reg, int src_reg) +{ + SUB_HOST_REG_L(dst_reg, src_reg); + + return dst_reg; +} +static inline int CMP_HOST_REG_IMM_B(int host_reg, uint8_t imm) +{ + SUB_HOST_REG_IMM_B(host_reg, imm); + + return host_reg; +} +static inline int CMP_HOST_REG_IMM_W(int host_reg, uint16_t imm) +{ + SUB_HOST_REG_IMM_W(host_reg, imm); + + return host_reg; +} +static inline int CMP_HOST_REG_IMM_L(int host_reg, uint32_t imm) +{ + SUB_HOST_REG_IMM(host_reg, imm); + + return host_reg; +} + +#define XOR_HOST_REG_B XOR_HOST_REG_L +#define XOR_HOST_REG_W XOR_HOST_REG_L +static inline void XOR_HOST_REG_L(int dst_reg, int src_reg) +{ + addbyte(0x31); /*XORL dst_reg, src_reg*/ + addbyte(0xc0 | dst_reg | (src_reg << 3)); +} +static inline void XOR_HOST_REG_IMM(int host_reg, uint32_t imm) +{ + if (imm < 0x80 || imm >= 0xffffff80) + { + addbyte(0x83); /*XORL host_reg, imm*/ + addbyte(0xF0 | host_reg); + addbyte(imm & 0xff); + } + else + { + addbyte(0x81); /*XORL host_reg, imm*/ + addbyte(0xF0 | host_reg); + addlong(imm); + } +} + +static inline void CALL_FUNC(uintptr_t dest) +{ + addbyte(0xE8); /*CALL*/ + addlong(((uintptr_t)dest - (uintptr_t)(&codeblock[block_current].data[block_pos + 4]))); +} + +static inline void SHL_B_IMM(int reg, int count) +{ + addbyte(0xc0); /*SHL reg, count*/ + addbyte(0xc0 | reg | 0x20); + addbyte(count); +} +static inline void SHL_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SHL reg, count*/ + addbyte(0xc1); + addbyte(0xc0 | reg | 0x20); + addbyte(count); +} +static inline void SHL_L_IMM(int reg, int count) +{ + addbyte(0xc1); /*SHL reg, count*/ + addbyte(0xc0 | reg | 0x20); + addbyte(count); +} +static inline void SHR_B_IMM(int reg, int count) +{ + addbyte(0xc0); /*SHR reg, count*/ + addbyte(0xc0 | reg | 0x28); + addbyte(count); +} +static inline void SHR_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SHR reg, count*/ + addbyte(0xc1); + addbyte(0xc0 | reg | 0x28); + addbyte(count); +} +static inline void SHR_L_IMM(int reg, int count) +{ + addbyte(0xc1); /*SHR reg, count*/ + addbyte(0xc0 | reg | 0x28); + addbyte(count); +} +static inline void SAR_B_IMM(int reg, int count) +{ + addbyte(0xc0); /*SAR reg, count*/ + addbyte(0xc0 | reg | 0x38); + addbyte(count); +} +static inline void SAR_W_IMM(int reg, int count) +{ + addbyte(0x66); /*SAR reg, count*/ + addbyte(0xc1); + addbyte(0xc0 | reg | 0x38); + addbyte(count); +} +static inline void SAR_L_IMM(int reg, int count) +{ + addbyte(0xc1); /*SAR reg, count*/ + addbyte(0xc0 | reg | 0x38); + addbyte(count); +} + + +static inline void CHECK_SEG_READ(x86seg *seg) +{ + /*Segments always valid in real/V86 mode*/ + if (!(cr0 & 1) || (cpu_state.eflags & VM_FLAG)) + return; + /*CS and SS must always be valid*/ + if (seg == &cpu_state.seg_cs || seg == &cpu_state.seg_ss) + return; + if (seg->checked) + return; + if (seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) + return; + + addbyte(0x83); /*CMP seg->base, -1*/ + addbyte(0x05|0x38); + addlong((uint32_t)&seg->base); + addbyte(-1); + addbyte(0x0f); + addbyte(0x84); /*JE BLOCK_GPF_OFFSET*/ + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + + seg->checked = 1; +} +static inline void CHECK_SEG_WRITE(x86seg *seg) +{ + /*Segments always valid in real/V86 mode*/ + if (!(cr0 & 1) || (cpu_state.eflags & VM_FLAG)) + return; + /*CS and SS must always be valid*/ + if (seg == &cpu_state.seg_cs || seg == &cpu_state.seg_ss) + return; + if (seg->checked) + return; + if (seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) + return; + + addbyte(0x83); /*CMP seg->base, -1*/ + addbyte(0x05|0x38); + addlong((uint32_t)&seg->base); + addbyte(-1); + addbyte(0x0f); + addbyte(0x84); /*JE BLOCK_GPF_OFFSET*/ + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + + seg->checked = 1; +} +static inline void CHECK_SEG_LIMITS(x86seg *seg, int end_offset) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + return; + + addbyte(0x3b); /*CMP EAX, seg->limit_low*/ + addbyte(0x05); + addlong((uint32_t)&seg->limit_low); + addbyte(0x0f); /*JB BLOCK_GPF_OFFSET*/ + addbyte(0x82); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + if (end_offset) + { + addbyte(0x83); /*ADD EAX, end_offset*/ + addbyte(0xc0); + addbyte(end_offset); + addbyte(0x3b); /*CMP EAX, seg->limit_high*/ + addbyte(0x05); + addlong((uint32_t)&seg->limit_high); + addbyte(0x0f); /*JNBE BLOCK_GPF_OFFSET*/ + addbyte(0x87); + addlong(BLOCK_GPF_OFFSET - (block_pos + 4)); + addbyte(0x83); /*SUB EAX, end_offset*/ + addbyte(0xe8); + addbyte(end_offset); + } +} + +static inline void MEM_LOAD_ADDR_EA_B(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_b*/ + addlong(mem_load_addr_ea_b - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[0] = 8; +} +static inline int MEM_LOAD_ADDR_EA_B_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_b_no_abrt*/ + addlong(mem_load_addr_ea_b_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static inline void MEM_LOAD_ADDR_EA_W(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_w*/ + addlong(mem_load_addr_ea_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[0] = 8; +} +static inline void MEM_LOAD_ADDR_EA_W_OFFSET(x86seg *seg, int offset) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0x83); /*ADD EAX, offset*/ + addbyte(0xc0); + addbyte(offset); + addbyte(0xe8); /*CALL mem_load_addr_ea_w*/ + addlong(mem_load_addr_ea_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[0] = 8; +} +static inline int MEM_LOAD_ADDR_EA_W_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_w_no_abrt*/ + addlong(mem_load_addr_ea_w_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static inline void MEM_LOAD_ADDR_EA_L(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_l*/ + addlong(mem_load_addr_ea_l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + + host_reg_mapping[0] = 8; +} +static inline int MEM_LOAD_ADDR_EA_L_NO_ABRT(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_l_no_abrt*/ + addlong(mem_load_addr_ea_l_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} + +static inline void MEM_LOAD_ADDR_EA_Q(x86seg *seg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_load_addr_ea_q*/ + addlong(mem_load_addr_ea_q - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[0] = 8; +} + +static inline void MEM_LOAD_ADDR_IMM_B(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_B(seg); +} +static inline void MEM_LOAD_ADDR_IMM_W(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_W(seg); +} +static inline void MEM_LOAD_ADDR_IMM_L(x86seg *seg, uint32_t addr) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_LOAD_ADDR_EA_L(seg); +} + +static inline void MEM_STORE_ADDR_EA_B(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_b*/ + addlong(mem_store_addr_ea_b - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_B_NO_ABRT(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_b_no_abrt*/ + addlong(mem_store_addr_ea_b_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_W(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_w*/ + addlong(mem_store_addr_ea_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_W_NO_ABRT(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_w_no_abrt*/ + addlong(mem_store_addr_ea_w_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_L(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_l*/ + addlong(mem_store_addr_ea_l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_L_NO_ABRT(x86seg *seg, int host_reg) +{ + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_l_no_abrt*/ + addlong(mem_store_addr_ea_l_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} +static inline void MEM_STORE_ADDR_EA_Q(x86seg *seg, int host_reg, int host_reg2) +{ + if (host_reg != REG_EBX) + { + addbyte(0x89); /*MOV EBX, host_reg*/ + addbyte(0xc0 | REG_EBX | (host_reg << 3)); + } + if (host_reg2 != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg2*/ + addbyte(0xc0 | REG_ECX | (host_reg2 << 3)); + } + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_q*/ + addlong(mem_store_addr_ea_q - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} + +static inline void MEM_STORE_ADDR_IMM_B(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_B(seg, host_reg); +} +static inline void MEM_STORE_ADDR_IMM_L(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_L(seg, host_reg); +} +static inline void MEM_STORE_ADDR_IMM_W(x86seg *seg, uint32_t addr, int host_reg) +{ + addbyte(0xb8); /*MOV EAX, addr*/ + addlong(addr); + MEM_STORE_ADDR_EA_W(seg, host_reg); +} + + +static inline x86seg *FETCH_EA_16(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc) +{ + int mod = (fetchdat >> 6) & 3; + int rm = fetchdat & 7; + if (!mod && rm == 6) + { + addbyte(0xb8); /*MOVL EAX, imm16*/ + addlong((fetchdat >> 8) & 0xffff); + (*op_pc) += 2; + } + else + { + switch (mod) + { + case 0: + addbyte(0xa1); /*MOVL EAX, *mod1add[0][rm]*/ + addlong((uint32_t)mod1add[0][rm]); + if (mod1add[1][rm] != &zero) + { + addbyte(0x03); /*ADDL EAX, *mod1add[1][rm]*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][rm]); + } + break; + case 1: + addbyte(0xa1); /*MOVL EAX, *mod1add[0][rm]*/ + addlong((uint32_t)mod1add[0][rm]); + addbyte(0x83); /*ADDL EAX, imm8*/ + addbyte(0xc0 | REG_EAX); + addbyte((int8_t)(rmdat >> 8)); + if (mod1add[1][rm] != &zero) + { + addbyte(0x03); /*ADDL EAX, *mod1add[1][rm]*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][rm]); + } + (*op_pc)++; + break; + case 2: + addbyte(0xb8); /*MOVL EAX, imm16*/ + addlong((fetchdat >> 8) & 0xffff); + addbyte(0x03); /*ADDL EAX, *mod1add[0][rm]*/ + addbyte(0x05); + addlong((uint32_t)mod1add[0][rm]); + if (mod1add[1][rm] != &zero) + { + addbyte(0x03); /*ADDL EAX, *mod1add[1][rm]*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][rm]); + } + (*op_pc) += 2; + break; + } + addbyte(0x25); /*ANDL EAX, 0xffff*/ + addlong(0xffff); + + if (mod1seg[rm] == &ss && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + } + return op_ea_seg; +} + +static inline x86seg *FETCH_EA_32(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, int stack_offset) +{ + uint32_t new_eaaddr; + int mod = (fetchdat >> 6) & 3; + int rm = fetchdat & 7; + + if (rm == 4) + { + uint8_t sib = fetchdat >> 8; + (*op_pc)++; + + switch (mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL EAX, imm32*/ + addlong(new_eaaddr); + (*op_pc) += 4; + } + else + { + addbyte(0x8b); /*MOVL EAX, regs[sib&7].l*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + } + break; + case 1: + addbyte(0x8b); /*MOVL EAX, regs[sib&7].l*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + addbyte(0x83); /*ADDL EAX, imm8*/ + addbyte(0xc0 | REG_EAX); + addbyte((int8_t)(rmdat >> 16)); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL EAX, new_eaaddr*/ + addlong(new_eaaddr); + addbyte(0x03); /*ADDL EAX, regs[sib&7].l*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + (*op_pc) += 4; + break; + } + if (stack_offset && (sib & 7) == 4 && (mod || (sib & 7) != 5)) /*ESP*/ + { + if (stack_offset < 0x80 || stack_offset >= 0xffffff80) + { + addbyte(0x83); + addbyte(0xc0 | REG_EAX); + addbyte(stack_offset); + } + else + { + addbyte(0x05); /*ADDL EAX, stack_offset*/ + addlong(stack_offset); + } + } + if (((sib & 7) == 4 || (mod && (sib & 7) == 5)) && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (((sib >> 3) & 7) != 4) + { + switch (sib >> 6) + { + case 0: + addbyte(0x03); /*ADDL EAX, regs[sib&7].l*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); + break; + case 1: + addbyte(0x8B); addbyte(0x45 | (REG_EDI << 3)); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL EDI, reg*/ + addbyte(0x01); addbyte(0xc0 | REG_EAX | (REG_EDI << 3)); /*ADDL EAX, EDI*/ + addbyte(0x01); addbyte(0xc0 | REG_EAX | (REG_EDI << 3)); /*ADDL EAX, EDI*/ + break; + case 2: + addbyte(0x8B); addbyte(0x45 | (REG_EDI << 3)); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL EDI, reg*/ + addbyte(0xC1); addbyte(0xE0 | REG_EDI); addbyte(2); /*SHL EDI, 2*/ + addbyte(0x01); addbyte(0xc0 | REG_EAX | (REG_EDI << 3)); /*ADDL EAX, EDI*/ + break; + case 3: + addbyte(0x8B); addbyte(0x45 | (REG_EDI << 3)); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL EDI reg*/ + addbyte(0xC1); addbyte(0xE0 | REG_EDI); addbyte(3); /*SHL EDI, 3*/ + addbyte(0x01); addbyte(0xc0 | REG_EAX | (REG_EDI << 3)); /*ADDL EAX, EDI*/ + break; + } + } + } + else + { + if (!mod && rm == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL EAX, imm32*/ + addlong(new_eaaddr); + (*op_pc) += 4; + return op_ea_seg; + } + addbyte(0x8b); /*MOVL EAX, regs[rm].l*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[rm].l)); + cpu_state.eaaddr = cpu_state.regs[rm].l; + if (mod) + { + if (rm == 5 && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (mod == 1) + { + addbyte(0x83); /*ADD EAX, imm8*/ + addbyte(0xc0 | REG_EAX); + addbyte((int8_t)(fetchdat >> 8)); + (*op_pc)++; + } + else + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x05); /*ADD EAX, imm32*/ + addlong(new_eaaddr); + (*op_pc) += 4; + } + } + } + return op_ea_seg; +} + +static inline x86seg *FETCH_EA(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, uint32_t op_32) +{ + if (op_32 & 0x200) + return FETCH_EA_32(op_ea_seg, fetchdat, op_ssegs, op_pc, 0); + return FETCH_EA_16(op_ea_seg, fetchdat, op_ssegs, op_pc); +} + + +static inline void LOAD_STACK_TO_EA(int off) +{ + if (stack32) + { + addbyte(0x8b); /*MOVL EAX,[ESP]*/ + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + if (off) + { + addbyte(0x83); /*ADD EAX, off*/ + addbyte(0xc0 | (0 << 3) | REG_EAX); + addbyte(off); + } + } + else + { + addbyte(0x0f); /*MOVZX EAX,W[ESP]*/ + addbyte(0xb7); + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + if (off) + { + addbyte(0x66); /*ADD AX, off*/ + addbyte(0x05); + addword(off); + } + } +} + +static inline void LOAD_EBP_TO_EA(int off) +{ + if (stack32) + { + addbyte(0x8b); /*MOVL EAX,[EBP]*/ + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_EBP].l)); + if (off) + { + addbyte(0x83); /*ADD EAX, off*/ + addbyte(0xc0 | (0 << 3) | REG_EAX); + addbyte(off); + } + } + else + { + addbyte(0x0f); /*MOVZX EAX,W[EBP]*/ + addbyte(0xb7); + addbyte(0x45 | (REG_EAX << 3)); + addbyte((uint8_t)cpu_state_offset(regs[REG_EBP].w)); + if (off) + { + addbyte(0x66); /*ADD AX, off*/ + addbyte(0x05); + addword(off); + } + } +} + +static inline void SP_MODIFY(int off) +{ + if (stack32) + { + if (off < 0x80) + { + addbyte(0x83); /*ADD [ESP], off*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + addbyte(off); + } + else + { + addbyte(0x81); /*ADD [ESP], off*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].l)); + addlong(off); + } + } + else + { + if (off < 0x80) + { + addbyte(0x66); /*ADD [SP], off*/ + addbyte(0x83); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + addbyte(off); + } + else + { + addbyte(0x66); /*ADD [SP], off*/ + addbyte(0x81); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[REG_ESP].w)); + addword(off); + } + } +} + + +static inline void TEST_ZERO_JUMP_W(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x66); /*CMPW host_reg, 0*/ + addbyte(0x83); + addbyte(0xc0 | 0x38 | host_reg); + addbyte(0); + addbyte(0x75); /*JNZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} +static inline void TEST_ZERO_JUMP_L(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x83); /*CMPW host_reg, 0*/ + addbyte(0xc0 | 0x38 | host_reg); + addbyte(0); + addbyte(0x75); /*JNZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void TEST_NONZERO_JUMP_W(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x66); /*CMPW host_reg, 0*/ + addbyte(0x83); + addbyte(0xc0 | 0x38 | host_reg); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} +static inline void TEST_NONZERO_JUMP_L(int host_reg, uint32_t new_pc, int taken_cycles) +{ + addbyte(0x83); /*CMPW host_reg, 0*/ + addbyte(0xc0 | 0x38 | host_reg); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7+5+(taken_cycles ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(new_pc); + if (taken_cycles) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(taken_cycles); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void BRANCH_COND_BE(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + switch (codegen_flags_changed ? cpu_state.flags_op : FLAGS_UNKNOWN) + { + case FLAGS_SUB8: + addbyte(0x8a); /*MOV AL, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3a); /*CMP AL, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x76); /*JBE*/ + else + addbyte(0x77); /*JNBE*/ + break; + case FLAGS_SUB16: + addbyte(0x66); /*MOV AX, flags_op1*/ + addbyte(0x8b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x66); /*CMP AX, flags_op2*/ + addbyte(0x3b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x76); /*JBE*/ + else + addbyte(0x77); /*JNBE*/ + break; + case FLAGS_SUB32: + addbyte(0x8b); /*MOV EAX, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3b); /*CMP EAX, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x76); /*JBE*/ + else + addbyte(0x77); /*JNBE*/ + break; + + default: + if (codegen_flags_changed && cpu_state.flags_op != FLAGS_UNKNOWN) + { + addbyte(0x83); /*CMP flags_res, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(flags_res)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + } + else + { + CALL_FUNC((uintptr_t)ZF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x75); /*JNZ +*/ + } + if (not) + addbyte(5+2+2+7+5+(timing_bt ? 4 : 0)); + else + addbyte(5+2+2); + CALL_FUNC((uintptr_t)CF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + break; + } + addbyte(7+5+(timing_bt ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void BRANCH_COND_L(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + switch (codegen_flags_changed ? cpu_state.flags_op : FLAGS_UNKNOWN) + { + case FLAGS_SUB8: + addbyte(0x8a); /*MOV AL, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3a); /*CMP AL, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7c); /*JL*/ + else + addbyte(0x7d); /*JNL*/ + break; + case FLAGS_SUB16: + addbyte(0x66); /*MOV AX, flags_op1*/ + addbyte(0x8b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x66); /*CMP AX, flags_op2*/ + addbyte(0x3b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7c); /*JL*/ + else + addbyte(0x7d); /*JNL*/ + break; + case FLAGS_SUB32: + addbyte(0x8b); /*MOV EAX, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3b); /*CMP EAX, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7c); /*JL*/ + else + addbyte(0x7d); /*JNL*/ + break; + + default: + CALL_FUNC((uintptr_t)NF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE BL*/ + addbyte(0x95); + addbyte(0xc3); + CALL_FUNC((uintptr_t)VF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE AL*/ + addbyte(0x95); + addbyte(0xc0); + addbyte(0x38); /*CMP AL, BL*/ + addbyte(0xd8); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + break; + } + addbyte(7+5+(timing_bt ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + +static inline void BRANCH_COND_LE(int pc_offset, uint32_t op_pc, uint32_t offset, int not) +{ + switch (codegen_flags_changed ? cpu_state.flags_op : FLAGS_UNKNOWN) + { + case FLAGS_SUB8: + addbyte(0x8a); /*MOV AL, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3a); /*CMP AL, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7e); /*JLE*/ + else + addbyte(0x7f); /*JNLE*/ + break; + case FLAGS_SUB16: + addbyte(0x66); /*MOV AX, flags_op1*/ + addbyte(0x8b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x66); /*CMP AX, flags_op2*/ + addbyte(0x3b); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7e); /*JLE*/ + else + addbyte(0x7f); /*JNLE*/ + break; + case FLAGS_SUB32: + addbyte(0x8b); /*MOV EAX, flags_op1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op1)); + addbyte(0x3b); /*CMP EAX, flags_op2*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(flags_op2)); + if (not) + addbyte(0x7e); /*JLE*/ + else + addbyte(0x7f); /*JNLE*/ + break; + + default: + if (codegen_flags_changed && cpu_state.flags_op != FLAGS_UNKNOWN) + { + addbyte(0x83); /*CMP flags_res, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(flags_res)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + } + else + { + CALL_FUNC((uintptr_t)ZF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x75); /*JNZ +*/ + } + if (not) + addbyte(5+2+3+5+2+3+2+2+7+5+(timing_bt ? 4 : 0)); + else + addbyte(5+2+3+5+2+3+2+2); + + CALL_FUNC((uintptr_t)NF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE BL*/ + addbyte(0x95); + addbyte(0xc3); + CALL_FUNC((uintptr_t)VF_SET); + addbyte(0x85); /*TEST EAX,EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*SETNE AL*/ + addbyte(0x95); + addbyte(0xc0); + addbyte(0x38); /*CMP AL, BL*/ + addbyte(0xd8); + if (not) + addbyte(0x75); /*JNZ +*/ + else + addbyte(0x74); /*JZ +*/ + break; + } + addbyte(7+5+(timing_bt ? 4 : 0)); + addbyte(0xC7); /*MOVL [pc], new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc+pc_offset+offset); + if (timing_bt) + { + addbyte(0x83); /*SUB $codegen_block_cycles, cyclcs*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(_cycles)); + addbyte(timing_bt); + } + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} + + +static inline void FP_ENTER() +{ + if (codegen_fpu_entered) + return; + + addbyte(0xf6); /*TEST cr0, 0xc*/ + addbyte(0x05); + addlong((uintptr_t)&cr0); + addbyte(0xc); + addbyte(0x74); /*JZ +*/ + addbyte(7+7+5+5); + addbyte(0xC7); /*MOVL [oldpc],op_old_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(op_old_pc); + addbyte(0xc7); /*MOV [ESP], 7*/ + addbyte(0x04); + addbyte(0x24); + addlong(7); + addbyte(0xe8); /*CALL x86_int*/ + addlong((uint32_t)x86_int - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + + codegen_fpu_entered = 1; +} + +static inline void FP_FLD(int reg) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xf3); /*MOVQ XMM0, ST[reg][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0xf3); /*MOVQ XMM1, MM[reg][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP + reg) & 7].q)); + addbyte(0x66); /*MOVQ ST[-1][EBP], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + addbyte(0x8a); /*MOV AL, tag[reg][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP + reg) & 7])); + addbyte(0x66); /*MOVQ MM[-1][EBP], XMM1*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP - 1) & 7].q)); + addbyte(0x88); /*MOV tag[-1][EBP], AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + } + else + { + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(0x01); + } + + addbyte(0xdd); /*FLD [ST+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(0x07); + addbyte(0x8b); /*MOV EDX, [ST_i64+EAX]*/ + addbyte(0x54); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x8b); /*MOV ECX, [ST_i64+4+EAX]*/ + addbyte(0x4c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(MM)+4); + addbyte(0x8a); /*MOV AL, [tag+EAX]*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x88); /*MOV [tag+EBX], AL*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x89); /*MOV [ST_i64+EBX], EDX*/ + addbyte(0x54); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x89); /*MOV [ST_i64+EBX+4], ECX*/ + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)+4); + + addbyte(0x89); /*MOV [TOP], EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + } +} + +static inline void FP_FST(int reg) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xf3); /*MOVQ XMM0, ST[0][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x8a); /*MOV AL, tag[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(0x66); /*MOVQ ST[reg][EBP], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + addbyte(0x88); /*MOV tag[reg][EBP], AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP + reg) & 7])); + } + else + { + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xdd); /*FLD [ST+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [tag+EAX]*/ + addbyte(0x5c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + + if (reg) + { + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + } + + addbyte(0xdd); /*FSTP [ST+EAX*8]*/ + addbyte(0x5c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x88); /*MOV [tag+EAX], BL*/ + addbyte(0x5c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} + +static inline void FP_FXCH(int reg) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xf3); /*MOVQ XMM0, ST[0][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0xf3); /*MOVQ XMM1, ST[reg][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + addbyte(0x66); /*MOVQ ST[reg][EBP], XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + addbyte(0xf3); /*MOVQ XMM2, MM[0][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x55); + addbyte((uint8_t)cpu_state_offset(MM[cpu_state.TOP].q)); + addbyte(0x66); /*MOVQ ST[0][EBP], XMM1*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0xf3); /*MOVQ XMM3, MM[reg][EBP]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP + reg) & 7].q)); + addbyte(0x66); /*MOVQ MM[reg][EBP], XMM2*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x55); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP + reg) & 7].q)); + addbyte(0x8a); /*MOV AL, tag[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(0x66); /*MOVQ MM[0][EBP], XMM3*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(MM[cpu_state.TOP].q)); + addbyte(0x8a); /*MOV AH, tag[reg][EBP]*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP + reg) & 7])); + addbyte(0x88); /*MOV tag[reg][EBP], AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP + reg) & 7])); + addbyte(0x88); /*MOV tag[0][EBP], AH*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EAX, [TOP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*ADD EAX, reg*/ + addbyte(0xc0); + addbyte(reg); + + addbyte(0xdd); /*FLD [ST+EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(0x07); + addbyte(0xdd); /*FLD [ST+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdd); /*FSTP [ST+EAX*8]*/ + addbyte(0x5c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV CL, tag[EAX]*/ + addbyte(0x4c); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x8a); /*MOV DL, tag[EBX]*/ + addbyte(0x54); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x88); /*MOV tag[EBX], CL*/ + addbyte(0x4c); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x88); /*MOV tag[EAX], DL*/ + addbyte(0x54); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0xbe); /*MOVL ESI, ST_int64*/ + addlong((uintptr_t)cpu_state.MM); + addbyte(0x8b); /*MOV ECX, ST_int64[EAX*8]*/ + addbyte(0x0c); + addbyte(0xc6); + addbyte(0x8b); /*MOV EDX, ST_int64[EBX*8]*/ + addbyte(0x14); + addbyte(0xde); + addbyte(0x89); /*MOV ST_int64[EBX*8], ECX*/ + addbyte(0x0c); + addbyte(0xde); + addbyte(0x89); /*MOV ST_int64[EAX*8], EDX*/ + addbyte(0x14); + addbyte(0xc6); + addbyte(0x8b); /*MOV ECX, ST_int64[EAX*8]+4*/ + addbyte(0x4c); + addbyte(0xc6); + addbyte(0x04); + addbyte(0x8b); /*MOV EDX, ST_int64[EBX*8]+4*/ + addbyte(0x54); + addbyte(0xde); + addbyte(0x04); + addbyte(0x89); /*MOV ST_int64[EBX*8]+4, ECX*/ + addbyte(0x4c); + addbyte(0xde); + addbyte(0x04); + addbyte(0x89); /*MOV ST_int64[EAX*8]+4, EDX*/ + addbyte(0x54); + addbyte(0xc6); + addbyte(0x04); + } +} + + +static inline void FP_LOAD_S() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0xd9); /*FLD [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0x0f); /*SETE tag[reg][EBP]*/ + addbyte(0x94); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + addbyte(0xdd); /*FSTP ST[reg][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0xd9); /*FLD [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} +static inline void FP_LOAD_D() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV ST[reg][EBP], EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0x89); /*MOV ST[reg][EBP]+4, EDX*/ + addbyte(0x55); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7]) + 4); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0x0f); /*SETE tag[reg][EBP]*/ + addbyte(0x94); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0xdd); /*FLD [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x83); /*CMP EAX, 0*/ + addbyte(0xf8); + addbyte(0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} +static inline void FP_LOAD_IW() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x66); /*MOV [ESP], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x24); + addbyte(0x66); /*TEST AX, AX*/ + addbyte(0x85); + addbyte(0xc0); + addbyte(0xdf); /*FILDw [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0x0f); /*SETE tag[reg][EBP]*/ + addbyte(0x94); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + addbyte(0xdd); /*FSTP ST[reg][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0xdf); /*FILDw [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x83); /*CMP EAX, 0*/ + addbyte(0xf8); + addbyte(0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} +static inline void FP_LOAD_IL() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0xdb); /*FILDl [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0x0f); /*SETE tag[reg][EBP]*/ + addbyte(0x94); + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + addbyte(0xdd); /*FSTP ST[reg][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0xdb); /*FILDl [ESP]*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x83); /*CMP EAX, 0*/ + addbyte(0xf8); + addbyte(0); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0f); /*SETE [tag+EBX]*/ + addbyte(0x94); + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} +static inline void FP_LOAD_IQ() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV MM[reg][EBP], EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP - 1) & 7].q)); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0x89); /*MOV MM[reg][EBP]+4, EDX*/ + addbyte(0x55); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP - 1) & 7].q) + 4); + addbyte(0x0f); /*SETE AL*/ + addbyte(0x94); + addbyte(0xc0); + addbyte(0xdf); /*FILDq MM[reg][EBP]*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(MM[(cpu_state.TOP - 1) & 7].q)); + addbyte(0x0c); /*OR AL, TAG_UINT64*/ + addbyte(TAG_UINT64); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0x88); /*MOV tag[reg][EBP], AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + addbyte(0xdd); /*FSTP ST[reg][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0x89); /*MOV [ST_i64+EBX*8], EAX*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0x89); /*MOV [ST_i64+4+EBX*8], EDX*/ + addbyte(0x54); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)+4); + addbyte(0x83); /*CMP EAX, 0*/ + addbyte(0xf8); + addbyte(0); + addbyte(0xdf); /*FILDl [ST_i64+EBX*8]*/ + addbyte(0x6c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + addbyte(0x0f); /*SETE AL*/ + addbyte(0x94); + addbyte(0xc0); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x0c); /*OR AL, TAG_UINT64*/ + addbyte(TAG_UINT64); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x88); /*MOV [tag+EBX], AL*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + } +} + +static inline void FP_LOAD_IMM_Q(uint64_t v) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xc7); /*MOV ST[reg][EBP], v*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + addlong(v & 0xffffffff); + addbyte(0xc7); /*MOV ST[reg][EBP]+4, v*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP - 1) & 7]) + 4); + addlong(v >> 32); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP - 1) & 7); + addbyte(0xc6); /*MOVB tag[reg][EBP], 1:0*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP - 1) & 7])); + addbyte(v ? 0 : 1); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x83); /*SUB EBX, 1*/ + addbyte(0xeb); + addbyte(1); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + addbyte(0xc7); /*MOV ST[EBP+EBX*8], v*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addlong(v & 0xffffffff); + addbyte(0xc7); /*MOV ST[EBP+EBX*8]+4, v*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST) + 4); + addlong(v >> 32); + addbyte(0xc6); /*MOVB tag[reg][EBP], 1:0*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(v ? 0 : 1); + addbyte(0x89); /*MOV TOP, EBX*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + } +} + +static inline int FP_LOAD_REG(int reg) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xdd); /*FLD ST[reg][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc3); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + } + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + addbyte(0xd9); /*FSTP [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + addbyte(0x8b); /*MOV EAX, [ESP]*/ + addbyte(0x04 | (REG_EBX << 3)); + addbyte(0x24); + + return REG_EBX; +} + +static inline void FP_LOAD_REG_D(int reg, int *host_reg1, int *host_reg2) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xdd); /*FLD ST[reg][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + reg) & 7])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc3); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + } + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + addbyte(0xdd); /*FSTP [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + addbyte(0x8b); /*MOV EBX, [ESP]*/ + addbyte(0x04 | (REG_EBX << 3)); + addbyte(0x24); + addbyte(0x8b); /*MOV ECX, [ESP+4]*/ + addbyte(0x44 | (REG_ECX << 3)); + addbyte(0x24); + addbyte(0x04); + + *host_reg1 = REG_EBX; + *host_reg2 = REG_ECX; +} + +static inline int FP_LOAD_REG_INT_W(int reg) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc3); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + } + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + + addbyte(0xd9); /*FLDCW cpu_state.new_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + addbyte(0xdb); /*FISTP [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + addbyte(0xd9); /*FLDCW cpu_state.old_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(old_npxc)); + addbyte(0x8b); /*MOV EBX, [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + + return REG_EBX; +} +static inline int FP_LOAD_REG_INT(int reg) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc3); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + } + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + + addbyte(0xd9); /*FLDCW cpu_state.new_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + addbyte(0xdb); /*FISTP [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + addbyte(0xd9); /*FLDCW cpu_state.old_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(old_npxc)); + addbyte(0x8b); /*MOV EBX, [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + + return REG_EBX; +} +static inline void FP_LOAD_REG_INT_Q(int reg, int *host_reg1, int *host_reg2) +{ + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + if (reg) + { + addbyte(0x83); /*ADD EBX, reg*/ + addbyte(0xc3); + addbyte(reg); + addbyte(0x83); /*AND EBX, 7*/ + addbyte(0xe3); + addbyte(7); + } + if (codegen_fpu_loaded_iq[cpu_state.TOP] && (cpu_state.tag[cpu_state.TOP] & TAG_UINT64)) + { + /*If we know the register was loaded with FILDq in this block and + has not been modified, then we can skip most of the conversion + and just load the 64-bit integer representation directly */ + addbyte(0x8b); /*MOV ECX, [ST_i64+EBX*8]*/ + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)+4); + addbyte(0x8b); /*MOV EBX, [ST_i64+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + + return; + } + + addbyte(0xf6); /*TEST TAG[EBX], TAG_UINT64*/ + addbyte(0x44); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_UINT64); + addbyte(0x74); /*JZ +*/ + addbyte(4+4+2); + + addbyte(0x8b); /*MOV ECX, [ST_i64+EBX*8]*/ + addbyte(0x4c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)+4); + addbyte(0x8b); /*MOV EBX, [ST_i64+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(MM)); + + addbyte(0xeb); /*JMP done*/ + addbyte(4+3+3+3+3+4); + + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + + addbyte(0xd9); /*FLDCW cpu_state.new_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + addbyte(0xdf); /*FISTPQ [ESP]*/ + addbyte(0x3c); + addbyte(0x24); + addbyte(0xd9); /*FLDCW cpu_state.old_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(old_npxc)); + addbyte(0x8b); /*MOV EBX, [ESP]*/ + addbyte(0x1c); + addbyte(0x24); + addbyte(0x8b); /*MOV ECX, 4[ESP]*/ + addbyte(0x4c); + addbyte(0x24); + addbyte(4); + + *host_reg1 = REG_EBX; + *host_reg2 = REG_ECX; +} + +static inline void FP_POP() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xc6); /*MOVB tag[0][EBP], 3*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(3); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP-1) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP + 1) & 7); + } + else + { + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xc6); /*MOVB tag[EAX], 3*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(3); + addbyte(0x04); /*ADD AL, 1*/ + addbyte(1); + addbyte(0x24); /*AND AL, 7*/ + addbyte(7); + addbyte(0x88); /*MOV TOP, AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + } +} +static inline void FP_POP2() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xc6); /*MOVB tag[0][EBP], 3*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(3); + addbyte(0xc6); /*MOVB tag[1][EBP], 3*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP+1)&7])); + addbyte(3); + addbyte(0xc6); /*MOVB TOP[EBP], (TOP+2) & 7*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte((cpu_state.TOP + 2) & 7); + } + else + { + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0xc6); /*MOVB tag[EAX], 3*/ + addbyte(0x44); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(3); + addbyte(0x04); /*ADD AL, 2*/ + addbyte(2); + addbyte(0x24); /*AND AL, 7*/ + addbyte(7); + addbyte(0x88); /*MOV TOP, AL*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + } +} + +#define FPU_ADD 0x00 +#define FPU_DIV 0x30 +#define FPU_DIVR 0x38 +#define FPU_MUL 0x08 +#define FPU_SUB 0x20 +#define FPU_SUBR 0x28 + +static inline void FP_OP_S(int op) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[dst][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x80); /*AND tag[dst][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xd8); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP ST[dst][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xd8); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} +static inline void FP_OP_D(int op) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0xd9); /*FLDCW cpu_state.new_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + } + addbyte(0xdd); /*FLD ST[dst][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x80); /*AND tag[dst][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP ST[dst][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0xd9); /*FLDCW cpu_state.old_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(old_npxc)); + } + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0xd9); /*FLDCW cpu_state.new_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + } + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + if (((cpu_state.npxc >> 10) & 3) && op == FPU_ADD) + { + addbyte(0xd9); /*FLDCW cpu_state.old_npxc*/ + addbyte(0x6d); + addbyte((uint8_t)cpu_state_offset(old_npxc)); + } + } +} +static inline void FP_OP_IW(int op) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x66); /*MOV [ESP], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x80); /*AND tag[0][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xde); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP ST[0][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xde); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} +static inline void FP_OP_IL(int op) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x80); /*AND tag[0][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xda); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP ST[0][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xda); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} +#if 0 +static inline void FP_OP_IQ(int op) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x80); /*AND tag[0][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP ST[0][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD [ESP]*/ + addbyte(0x04 | op); + addbyte(0x24); + addbyte(0xdd); /*FSTP [ST+EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} +#endif + +static inline void FP_COMPARE_S() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xd8); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xd8); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } +} +static inline void FP_COMPARE_D() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xdc); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0x89); /*MOV [ESP+4], EDX*/ + addbyte(0x54); + addbyte(0x24); + addbyte(0x04); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xdc); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } +} +static inline void FP_COMPARE_IW() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x66); /*MOV [ESP], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xde); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xde); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } +} +static inline void FP_COMPARE_IL() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xda); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } + else + { + addbyte(0x8b); /*MOV EBX, TOP*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV [ESP], EAX*/ + addbyte(0x04); + addbyte(0x24); + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x8a); /*MOV BL, [npxs+1]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND BL, ~(C0|C2|C3)*/ + addbyte(0xe3); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xda); /*FCOMP [ESP]*/ + addbyte(0x04 | 0x18); + addbyte(0x24); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR BL, AH*/ + addbyte(0xe3); + addbyte(0x88); /*MOV [npxs+1], BL*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } +} + +static inline void FP_OP_REG(int op, int dst, int src) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xdd); /*FLD ST[dst][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + dst) & 7])); + addbyte(0xdc); /*FADD ST[src][EBP]*/ + addbyte(0x45 | op); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + src) & 7])); + addbyte(0x80); /*AND tag[dst][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[(cpu_state.TOP + dst) & 7])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdd); /*FSTP ST[dst][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + dst) & 7])); + } + else + { + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (src || dst) + { + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(src ? src : dst); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + + if (src) + { + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EBX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x1d); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD ST[EAX*8]*/ + addbyte(0x44 | op); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdd); /*FSTP ST[EBX*8]*/ + addbyte(0x5c); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + else + { + addbyte(0xdd); /*FLD [ESI+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EAX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdc); /*FADD ST[EBX*8]*/ + addbyte(0x44 | op); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdd); /*FSTP ST[EAX*8]*/ + addbyte(0x5c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } + } +} + +static inline void FP_COMPARE_REG(int dst, int src) +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0x8a); /*MOV CL, [npxs+1]*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0xdd); /*FLD ST[dst][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + dst) & 7])); + addbyte(0x80); /*AND CL, ~(C0|C2|C3)*/ + addbyte(0xe1); + addbyte((~(C0|C2|C3)) >> 8); + addbyte(0xdc); /*FCOMP ST[src][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[(cpu_state.TOP + src) & 7])); + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR CL, AH*/ + addbyte(0xe1); + addbyte(0x88); /*MOV [npxs+1], CL*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } + else + { + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + if (src || dst) + { + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(src ? src : dst); + addbyte(0x83); /*AND EAX, 7*/ + addbyte(0xe0); + addbyte(7); + } + + addbyte(0x8a); /*MOV CL, [npxs+1]*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + addbyte(0xdb); /*FCLEX*/ + addbyte(0xe2); + addbyte(0x80); /*AND CL, ~(C0|C2|C3)*/ + addbyte(0xe1); + addbyte((~(C0|C2|C3)) >> 8); + + if (src) + { + addbyte(0xdd); /*FLD ST[EBX*8]*/ + addbyte(0x44); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdc); /*FCOMP ST[EAX*8]*/ + addbyte(0x44 | 0x18); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } + else + { + addbyte(0xdd); /*FLD [ESI+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0xdc); /*FCOMP ST[EBX*8]*/ + addbyte(0x44 | 0x18); + addbyte(0xdd); + addbyte((uint8_t)cpu_state_offset(ST)); + } + + addbyte(0xdf); /*FSTSW AX*/ + addbyte(0xe0); + addbyte(0x80); /*AND AH, (C0|C2|C3)*/ + addbyte(0xe4); + addbyte((C0|C2|C3) >> 8); + addbyte(0x08); /*OR CL, AH*/ + addbyte(0xe1); + addbyte(0x88); /*MOV [npxs+1], CL*/ + addbyte(0x4d); + addbyte((uint8_t)cpu_state_offset(npxs) + 1); + } +} + +static inline void FP_FCHS() +{ + if (codeblock[block_current].flags & CODEBLOCK_STATIC_TOP) + { + addbyte(0xdd); /*FLD ST[0][EBP]*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + addbyte(0xd9); /*FCHS*/ + addbyte(0xe0); + addbyte(0x80); /*AND tag[dst][EBP], ~TAG_UINT64*/ + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(tag[cpu_state.TOP])); + addbyte(TAG_NOT_UINT64); + addbyte(0xdd); /*FSTP ST[dst][EBP]*/ + addbyte(0x5d); + addbyte((uint8_t)cpu_state_offset(ST[cpu_state.TOP])); + } + else + { + addbyte(0x8b); /*MOV EAX, TOP*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + + addbyte(0xdd); /*FLD [ESI+EAX*8]*/ + addbyte(0x44); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + addbyte(0x80); /*AND tag[EAX], ~TAG_UINT64*/ + addbyte(0x64); + addbyte(0x05); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(TAG_NOT_UINT64); + addbyte(0xd9); /*FCHS*/ + addbyte(0xe0); + addbyte(0xdd); /*FSTP ST[EAX*8]*/ + addbyte(0x5c); + addbyte(0xc5); + addbyte((uint8_t)cpu_state_offset(ST)); + } +} + +static inline void UPDATE_NPXC(int reg) +{ + addbyte(0x66); /*AND cpu_state.new_npxc, ~0xc00*/ + addbyte(0x81); + addbyte(0x65); + addbyte((uint8_t)cpu_state_offset(new_npxc)); + addword(~0xc00); + if (reg) + { + addbyte(0x66); /*AND reg, 0xc00*/ + addbyte(0x81); + addbyte(0xe0 | reg); + addword(0xc00); + } + else + { + addbyte(0x66); /*AND AX, 0xc00*/ + addbyte(0x25); + addword(0xc00); + } + addbyte(0x66); /*OR cpu_state.new_npxc, reg*/ + addbyte(0x09); + addbyte(0x45 | (reg << 3)); + addbyte((uint8_t)cpu_state_offset(new_npxc)); +} + +static inline int ZERO_EXTEND_W_B(int reg) +{ + addbyte(0x0f); /*MOVZX regl, regb*/ + addbyte(0xb6); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} +static inline int ZERO_EXTEND_L_B(int reg) +{ + addbyte(0x0f); /*MOVZX regl, regb*/ + addbyte(0xb6); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} +static inline int ZERO_EXTEND_L_W(int reg) +{ + addbyte(0x0f); /*MOVZX regl, regw*/ + addbyte(0xb7); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} + +static inline int SIGN_EXTEND_W_B(int reg) +{ + addbyte(0x0f); /*MOVSX regl, regb*/ + addbyte(0xbe); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} +static inline int SIGN_EXTEND_L_B(int reg) +{ + addbyte(0x0f); /*MOVSX regl, regb*/ + addbyte(0xbe); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} +static inline int SIGN_EXTEND_L_W(int reg) +{ + addbyte(0x0f); /*MOVSX regl, regw*/ + addbyte(0xbf); + addbyte(0xc0 | reg | (reg << 3)); + return reg; +} + +static inline int COPY_REG(int src_reg) +{ + return src_reg; +} + +static inline void SET_BITS(uintptr_t addr, uint32_t val) +{ + if (val & ~0xff) + { + addbyte(0x81); + addbyte(0x0d); + addlong(addr); + addlong(val); + } + else + { + addbyte(0x80); + addbyte(0x0d); + addlong(addr); + addbyte(val); + } +} +static inline void CLEAR_BITS(uintptr_t addr, uint32_t val) +{ + if (val & ~0xff) + { + addbyte(0x81); + addbyte(0x25); + addlong(addr); + addlong(~val); + } + else + { + addbyte(0x80); + addbyte(0x25); + addlong(addr); + addbyte(~val); + } +} + +#define LOAD_Q_REG_1 REG_EAX +#define LOAD_Q_REG_2 REG_EDX + +static inline void MMX_ENTER() +{ + if (codegen_mmx_entered) + return; + + addbyte(0xf6); /*TEST cr0, 0xc*/ + addbyte(0x05); + addlong((uintptr_t)&cr0); + addbyte(0xc); + addbyte(0x74); /*JZ +*/ + addbyte(7+7+5+5); + addbyte(0xC7); /*MOVL [oldpc],op_old_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(op_old_pc); + addbyte(0xc7); /*MOV [ESP], 7*/ + addbyte(0x04); + addbyte(0x24); + addlong(7); + addbyte(0xe8); /*CALL x86_int*/ + addlong((uint32_t)x86_int - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0xe9); /*JMP end*/ + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); + + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xc6); /*MOV ISMMX, 1*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ismmx)); + addbyte(1); + addbyte(0x89); /*MOV TOP, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(TOP)); + addbyte(0x89); /*MOV tag, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[0])); + addbyte(0x89); /*MOV tag+4, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(tag[4])); + + codegen_mmx_entered = 1; +} + +extern int mmx_ebx_ecx_loaded; + +static inline int LOAD_MMX_D(int guest_reg) +{ + int host_reg = find_host_reg(); + host_reg_mapping[host_reg] = 100; + + addbyte(0x8b); /*MOV EBX, reg*/ + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); + + return host_reg; +} +static inline void LOAD_MMX_Q(int guest_reg, int *host_reg1, int *host_reg2) +{ + if (!mmx_ebx_ecx_loaded) + { + *host_reg1 = REG_EBX; + *host_reg2 = REG_ECX; + mmx_ebx_ecx_loaded = 1; + } + else + { + *host_reg1 = REG_EAX; + *host_reg2 = REG_EDX; + } + + addbyte(0x8b); /*MOV EBX, reg*/ + addbyte(0x45 | ((*host_reg1) << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); + addbyte(0x8b); /*MOV ECX, reg+4*/ + addbyte(0x45 | ((*host_reg2) << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[1])); +} +static inline int LOAD_MMX_Q_MMX(int guest_reg) +{ + int dst_reg = find_host_xmm_reg(); + host_reg_xmm_mapping[dst_reg] = guest_reg; + + addbyte(0xf3); /*MOVQ dst_reg,[reg]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x45 | (dst_reg << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].q)); + + return dst_reg; +} + +static inline int LOAD_INT_TO_MMX(int src_reg1, int src_reg2) +{ + int dst_reg = find_host_xmm_reg(); + host_reg_xmm_mapping[dst_reg] = 100; + + addbyte(0x66); /*MOVD dst_reg, src_reg1*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0 | (dst_reg << 3) | src_reg1); + addbyte(0x66); /*MOVD XMM7, src_reg2*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0 | (7 << 3) | src_reg2); + addbyte(0x66); /*PUNPCKLDQ dst_reg, XMM7*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0 | 7 | (dst_reg << 3)); + + return dst_reg; +} + +static inline void STORE_MMX_LQ(int guest_reg, int host_reg1) +{ + addbyte(0xC7); /*MOVL [reg],0*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[1])); + addlong(0); + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x45 | (host_reg1 << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); +} +static inline void STORE_MMX_Q(int guest_reg, int host_reg1, int host_reg2) +{ + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x45 | (host_reg1 << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[0])); + addbyte(0x89); /*MOVL [reg],host_reg*/ + addbyte(0x45 | (host_reg2 << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].l[1])); +} +static inline void STORE_MMX_Q_MMX(int guest_reg, int host_reg) +{ + addbyte(0x66); /*MOVQ [guest_reg],host_reg*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x45 | (host_reg << 3)); + addbyte((uint8_t)cpu_state_offset(MM[guest_reg].q)); +} + +#define MMX_x86_OP(name, opcode) \ +static inline void MMX_ ## name(int dst_reg, int src_reg) \ +{ \ + addbyte(0x66); /*op dst_reg, src_reg*/ \ + addbyte(0x0f); \ + addbyte(opcode); \ + addbyte(0xc0 | (dst_reg << 3) | src_reg); \ +} + +MMX_x86_OP(AND, 0xdb) +MMX_x86_OP(ANDN, 0xdf) +MMX_x86_OP(OR, 0xeb) +MMX_x86_OP(XOR, 0xef) + +MMX_x86_OP(ADDB, 0xfc) +MMX_x86_OP(ADDW, 0xfd) +MMX_x86_OP(ADDD, 0xfe) +MMX_x86_OP(ADDSB, 0xec) +MMX_x86_OP(ADDSW, 0xed) +MMX_x86_OP(ADDUSB, 0xdc) +MMX_x86_OP(ADDUSW, 0xdd) + +MMX_x86_OP(SUBB, 0xf8) +MMX_x86_OP(SUBW, 0xf9) +MMX_x86_OP(SUBD, 0xfa) +MMX_x86_OP(SUBSB, 0xe8) +MMX_x86_OP(SUBSW, 0xe9) +MMX_x86_OP(SUBUSB, 0xd8) +MMX_x86_OP(SUBUSW, 0xd9) + +MMX_x86_OP(PUNPCKLBW, 0x60); +MMX_x86_OP(PUNPCKLWD, 0x61); +MMX_x86_OP(PUNPCKLDQ, 0x62); +MMX_x86_OP(PCMPGTB, 0x64); +MMX_x86_OP(PCMPGTW, 0x65); +MMX_x86_OP(PCMPGTD, 0x66); + +MMX_x86_OP(PCMPEQB, 0x74); +MMX_x86_OP(PCMPEQW, 0x75); +MMX_x86_OP(PCMPEQD, 0x76); + +MMX_x86_OP(PSRLW, 0xd1); +MMX_x86_OP(PSRLD, 0xd2); +MMX_x86_OP(PSRLQ, 0xd3); +MMX_x86_OP(PSRAW, 0xe1); +MMX_x86_OP(PSRAD, 0xe2); +MMX_x86_OP(PSLLW, 0xf1); +MMX_x86_OP(PSLLD, 0xf2); +MMX_x86_OP(PSLLQ, 0xf3); + +MMX_x86_OP(PMULLW, 0xd5); +MMX_x86_OP(PMULHW, 0xe5); +MMX_x86_OP(PMADDWD, 0xf5); + +static inline void MMX_PACKSSWB(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKSSWB dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x63); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PACKUSWB(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKUSWB dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PACKSSDW(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PACKSSDW dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x08); +} +static inline void MMX_PUNPCKHBW(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLBW dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} +static inline void MMX_PUNPCKHWD(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLWD dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} +static inline void MMX_PUNPCKHDQ(int dst_reg, int src_reg) +{ + addbyte(0x66); /*PUNPCKLDQ dst_reg, src_reg*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0 | (dst_reg << 3) | src_reg); + addbyte(0x66); /*PSHUFD dst_reg, dst_reg*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0 | (dst_reg << 3) | dst_reg); + addbyte(0x0e); +} + +static inline void MMX_PSRLW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLW_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLW dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + +static inline void MMX_PSRLD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLD_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLD dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + +static inline void MMX_PSRLQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRLQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x10); + addbyte(amount); +} +static inline void MMX_PSRAQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSRAQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x20); + addbyte(amount); +} +static inline void MMX_PSLLQ_imm(int dst_reg, int amount) +{ + addbyte(0x66); /*PSLLQ dst_reg, amount*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xc0 | dst_reg | 0x30); + addbyte(amount); +} + + +static inline void SAVE_EA() +{ + addbyte(0x89); /*MOV [ESP+12], EAX*/ + addbyte(0x44); + addbyte(0x24); + addbyte(12); +} +static inline void LOAD_EA() +{ + addbyte(0x8b); /*MOV EAX, [ESP+12]*/ + addbyte(0x44); + addbyte(0x24); + addbyte(12); +} + +#define MEM_CHECK_WRITE_B MEM_CHECK_WRITE +static inline void MEM_CHECK_WRITE(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_check_write*/ + addlong(mem_check_write - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} +static inline void MEM_CHECK_WRITE_W(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_check_write_w*/ + addlong(mem_check_write_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} +static inline void MEM_CHECK_WRITE_L(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + if ((seg == &cpu_state.seg_ds && codegen_flat_ds && !(cpu_cur_status & CPU_STATUS_NOTFLATDS)) || (seg == &cpu_state.seg_ss && codegen_flat_ss && !(cpu_cur_status & CPU_STATUS_NOTFLATSS))) + { + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); + } + else + { + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + } + addbyte(0xe8); /*CALL mem_check_write_l*/ + addlong(mem_check_write_l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} + +static inline void LOAD_SEG(int host_reg, void *seg) +{ + addbyte(0xc7); /*MOV [ESP+4], seg*/ + addbyte(0x44); + addbyte(0x24); + addbyte(4); + addlong((uint32_t)seg); + addbyte(0x89); /*MOV [ESP], host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(0x24); + CALL_FUNC((uintptr_t)loadseg); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE end*/ + addbyte(0x85); + addlong(BLOCK_EXIT_OFFSET - (block_pos + 4)); +} diff --git a/src/codegen/codegen_ops_xchg.h b/src/codegen/codegen_ops_xchg.h new file mode 100644 index 000000000..597d26e1b --- /dev/null +++ b/src/codegen/codegen_ops_xchg.h @@ -0,0 +1,93 @@ +#define OP_XCHG_AX_(reg) \ + static uint32_t ropXCHG_AX_ ## reg(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int ax_reg, host_reg, temp_reg; \ + \ + ax_reg = LOAD_REG_W(REG_AX); \ + host_reg = LOAD_REG_W(REG_ ## reg); \ + temp_reg = COPY_REG(host_reg); \ + STORE_REG_TARGET_W_RELEASE(ax_reg, REG_ ## reg); \ + STORE_REG_TARGET_W_RELEASE(temp_reg, REG_AX); \ + \ + return op_pc; \ + } + +OP_XCHG_AX_(BX) +OP_XCHG_AX_(CX) +OP_XCHG_AX_(DX) +OP_XCHG_AX_(SI) +OP_XCHG_AX_(DI) +OP_XCHG_AX_(SP) +OP_XCHG_AX_(BP) + +#define OP_XCHG_EAX_(reg) \ + static uint32_t ropXCHG_EAX_ ## reg(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ + { \ + int eax_reg, host_reg, temp_reg; \ + \ + eax_reg = LOAD_REG_L(REG_EAX); \ + host_reg = LOAD_REG_L(REG_ ## reg); \ + temp_reg = COPY_REG(host_reg); \ + STORE_REG_TARGET_L_RELEASE(eax_reg, REG_ ## reg); \ + STORE_REG_TARGET_L_RELEASE(temp_reg, REG_EAX); \ + \ + return op_pc; \ + } + +OP_XCHG_EAX_(EBX) +OP_XCHG_EAX_(ECX) +OP_XCHG_EAX_(EDX) +OP_XCHG_EAX_(ESI) +OP_XCHG_EAX_(EDI) +OP_XCHG_EAX_(ESP) +OP_XCHG_EAX_(EBP) + +static uint32_t ropXCHG_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ +/* #ifdef __amd64__ + return 0; +#else */ + int src_reg, dst_reg, temp_reg; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + + dst_reg = LOAD_REG_B(fetchdat & 7); + src_reg = LOAD_REG_B((fetchdat >> 3) & 7); + temp_reg = COPY_REG(src_reg); + STORE_REG_TARGET_B_RELEASE(dst_reg, (fetchdat >> 3) & 7); + STORE_REG_TARGET_B_RELEASE(temp_reg, fetchdat & 7); + + return op_pc + 1; +/* #endif */ +} +static uint32_t ropXCHG_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg, temp_reg; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + + dst_reg = LOAD_REG_W(fetchdat & 7); + src_reg = LOAD_REG_W((fetchdat >> 3) & 7); + temp_reg = COPY_REG(src_reg); + STORE_REG_TARGET_W_RELEASE(dst_reg, (fetchdat >> 3) & 7); + STORE_REG_TARGET_W_RELEASE(temp_reg, fetchdat & 7); + + return op_pc + 1; +} +static uint32_t ropXCHG_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) +{ + int src_reg, dst_reg, temp_reg; + + if ((fetchdat & 0xc0) != 0xc0) + return 0; + + dst_reg = LOAD_REG_L(fetchdat & 7); + src_reg = LOAD_REG_L((fetchdat >> 3) & 7); + temp_reg = COPY_REG(src_reg); + STORE_REG_TARGET_L_RELEASE(dst_reg, (fetchdat >> 3) & 7); + STORE_REG_TARGET_L_RELEASE(temp_reg, fetchdat & 7); + + return op_pc + 1; +} diff --git a/src/codegen/codegen_timing_486.c b/src/codegen/codegen_timing_486.c new file mode 100644 index 000000000..a3859ecce --- /dev/null +++ b/src/codegen/codegen_timing_486.c @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +#define CYCLES(c) (int *)c +#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) + +static int *opcode_timings[256] = +{ +/*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, +/*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), +/*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), +/*30*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), + +/*40*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES2(17,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, + +/*80*/ &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm, &timing_rm, CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(5), CYCLES(6), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), &timing_int, &timing_int, CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL +}; + +static int *opcode_timings_mod3[256] = +{ +/*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, +/*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), +/*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), +/*30*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), + +/*40*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES2(14,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, + +/*80*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(2), CYCLES(1), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), &timing_int, &timing_int, CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL +}; + +static int *opcode_timings_0f[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, +/*70*/ NULL, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, CYCLES(100), NULL, NULL, NULL, NULL, NULL, NULL, &timing_rm, &timing_rm, + +/*80*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), NULL, NULL, CYCLES(3), CYCLES(3), NULL, CYCLES(13), CYCLES(3), CYCLES(3), NULL, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), NULL, NULL, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, +/*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, +/*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, +}; +static int *opcode_timings_0f_mod3[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, +/*70*/ NULL, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(100), NULL, NULL, NULL, NULL, NULL, NULL, &timing_rr, &timing_rr, + +/*80*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), NULL, NULL, CYCLES(3), CYCLES(3), NULL, CYCLES(13), CYCLES(3), CYCLES(3), NULL, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), NULL, NULL, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, +/*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, +/*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, +}; + +static int *opcode_timings_shift[8] = +{ + CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) +}; +static int *opcode_timings_shift_mod3[8] = +{ + CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) +}; + +static int *opcode_timings_f6[8] = +{ + &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static int *opcode_timings_f6_mod3[8] = +{ + &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static int *opcode_timings_f7[8] = +{ + &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static int *opcode_timings_f7_mod3[8] = +{ + &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static int *opcode_timings_ff[8] = +{ + &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL +}; +static int *opcode_timings_ff_mod3[8] = +{ + &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL +}; + +static int *opcode_timings_d8[8] = +{ +/* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; +static int *opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ + CYCLES(8), CYCLES(16), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; + +static int *opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ + CYCLES(3), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) +}; +static int *opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), + /*FXCH*/ + CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), + /*FNOP*/ + CYCLES(3), NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /*FSTP*/ + CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/* opFCHS opFABS opFTST opFXAM*/ + CYCLES(6), CYCLES(3), NULL, NULL, CYCLES(4), CYCLES(8), NULL, NULL, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI opFLDEG2 opFLDLN2 opFLDZ*/ + CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(8), CYCLES(8), CYCLES(8), CYCLES(4), NULL, +/* opF2XM1 opFYL2X opFPTAN opFPATAN opFDECSTP opFINCSTP,*/ + CYCLES(140), CYCLES(196), CYCLES(200), CYCLES(218), NULL, NULL, CYCLES(3), CYCLES(3), +/* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ + CYCLES(70), NULL, CYCLES(83), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(257), CYCLES(257) +}; + +static int *opcode_timings_da[8] = +{ +/* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; +static int *opcode_timings_da_mod3[8] = +{ + NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL +}; + + +static int *opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil FLDe FSTPe*/ + CYCLES(9), NULL, CYCLES(28), CYCLES(28), NULL, CYCLES(5), NULL, CYCLES(6) +}; +static int *opcode_timings_db_mod3[64] = +{ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/* opFNOP opFCLEX opFINIT opFNOP opFNOP*/ + NULL, CYCLES(3), CYCLES(7), CYCLES(17), CYCLES(3), CYCLES(3), NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +static int *opcode_timings_dc[8] = +{ +/* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ + CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; +static int *opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + CYCLES(8), CYCLES(16), NULL, NULL, CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; + +static int *opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ + CYCLES(3), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(3) +}; +static int *opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP FUCOM FUCOMP*/ + CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL +}; + +static int *opcode_timings_de[8] = +{ +/* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; +static int *opcode_timings_de_mod3[8] = +{ +/* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ + CYCLES(8), CYCLES(16), NULL, CYCLES(5), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) +}; + +static int *opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ + CYCLES(13), NULL, CYCLES(29), CYCLES(29), NULL, CYCLES(10), CYCLES(172), CYCLES(28) +}; +static int *opcode_timings_df_mod3[8] = +{ +/* FFREE FST FSTP FUCOM FUCOMP*/ + CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL +}; + +static int *opcode_timings_8x[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_8x_mod3[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_81[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_81_mod3[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; + +static int timing_count; +static uint8_t last_prefix; +static uint32_t regmask_modified; + +static inline int COUNT(int *c, int op_32) +{ + if ((uintptr_t)c <= 10000) + return (int)(uintptr_t)c; + if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) + { + if (op_32 & 0x100) + return ((uintptr_t)c >> 8) & 0xff; + return (uintptr_t)c & 0xff; + } + return *c; +} + +void codegen_timing_486_block_start() +{ + regmask_modified = 0; +} + +void codegen_timing_486_start() +{ + timing_count = 0; + last_prefix = 0; +} + +void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) +{ + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; +} + +void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + int **timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + + switch (last_prefix) + { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; + + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); +} + +void codegen_timing_486_block_end() +{ +} + +codegen_timing_t codegen_timing_486 = +{ + codegen_timing_486_start, + codegen_timing_486_prefix, + codegen_timing_486_opcode, + codegen_timing_486_block_start, + codegen_timing_486_block_end, + NULL +}; diff --git a/src/codegen/codegen_timing_686.c b/src/codegen/codegen_timing_686.c new file mode 100644 index 000000000..ff7ff54d2 --- /dev/null +++ b/src/codegen/codegen_timing_686.c @@ -0,0 +1,1057 @@ +/*Elements taken into account : + - X/Y pairing + - FPU/FXCH pairing + - Prefix decode delay + - AGI stalls + Elements not taken into account : + - Branch prediction (beyond most simplistic approximation) + - FPU queue + - Out of order execution (beyond most simplistic approximation) +*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "codegen.h" +#include "codegen_timing_common.h" + +/*Instruction has different execution time for 16 and 32 bit data. Does not pair */ +#define CYCLES_HAS_MULTI (1 << 31) + +#define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) + +/*Instruction lasts given number of cycles. Does not pair*/ +#define CYCLES(c) (c) + +/*Instruction follows either register timing, read-modify, or read-modify-write. + May be pairable*/ +#define CYCLES_REG (1 << 0) +#define CYCLES_RM (1 << 0) +#define CYCLES_RMW (1 << 0) +#define CYCLES_BRANCH (1 << 0) + +#define CYCLES_MASK ((1 << 7) - 1) + +/*Instruction does not pair*/ +#define PAIR_NP (0 << 29) +/*Instruction pairs in X pipe only*/ +#define PAIR_X (1 << 29) +/*Instruction pairs in X pipe only, and can not pair with a following instruction*/ +#define PAIR_X_BRANCH (2 << 29) +/*Instruction pairs in both X and Y pipes*/ +#define PAIR_XY (3 << 29) + +#define PAIR_MASK (3 << 29) + +#define INVALID 0 + +static int prev_full; +static uint32_t prev_opcode; +static uint32_t *prev_timings; +static uint32_t prev_op_32; +static uint32_t prev_regmask; +static uint64_t *prev_deps; +static uint32_t prev_fetchdat; + +static uint32_t last_regmask_modified; +static uint32_t regmask_modified; + +static uint32_t opcode_timings[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* ADD ADD PUSH ES POP ES*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* OR OR OR OR*/ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* OR OR PUSH CS */ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* ADC ADC PUSH SS POP SS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* SBB SBB SBB SBB*/ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* SBB SBB PUSH DS POP DS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), + +/* AND AND AND AND*/ +/*20*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* AND AND DAA*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), +/* SUB SUB SUB SUB*/ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* SUB SUB DAS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), + +/* XOR XOR XOR XOR*/ +/*30*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* XOR XOR AAA*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), +/* CMP CMP CMP CMP*/ + PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, +/* CMP CMP AAS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* INC ESP INC EBP INC ESI INC EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* POP EAX POP ECX POP EDX POP EBX*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* POP ESP POP EBP POP ESI POP EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(9), + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(10), +/* INSB INSW OUTSB OUTSW*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), + +/* Jxx*/ +/*70*/ PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* MOV MOV MOV MOV*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* MOV from seg LEA MOV to seg POP*/ + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES_REG, CYCLES(3), PAIR_XY | CYCLES(1), + +/* NOP XCHG XCHG XCHG*/ +/*90*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* XCHG XCHG XCHG XCHG*/ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* CBW CWD CALL far WAIT*/ + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), +/* PUSHF POPF SAHF LAHF*/ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(2), + +/* MOV MOV MOV MOV*/ +/*a0*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* MOVSB MOVSW CMPSB CMPSW*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* TEST TEST STOSB STOSW*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* LODSB LODSW SCASB SCASW*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/* MOV*/ +/*b0*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, PAIR_X_BRANCH | CYCLES(3), PAIR_X_BRANCH | CYCLES(2), +/* LES LDS MOV MOV*/ + PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* ENTER LEAVE RETF RETF*/ + PAIR_XY | CYCLES(10), PAIR_XY | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), +/* INT3 INT INTO IRET*/ + PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(16), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(10), + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + PAIR_XY | CYCLES(18), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(4), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ PAIR_X_BRANCH| CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), +/* CALL JMP JMP JMP*/ + PAIR_X_BRANCH | CYCLES_REG, PAIR_X_BRANCH | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_X_BRANCH | CYCLES_REG, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* HLT CMC*/ + PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(2), INVALID, INVALID, +/* CLC STC CLI STI*/ + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), +/* CLD STD INCDEC*/ + PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_RMW, INVALID +}; + +static uint32_t opcode_timings_mod3[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* ADD ADD PUSH ES POP ES*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* OR OR OR OR*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* OR OR PUSH CS */ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* ADC ADC PUSH SS POP SS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* SBB SBB SBB SBB*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* SBB SBB PUSH DS POP DS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), + +/* AND AND AND AND*/ +/*20*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* AND AND DAA*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), +/* SUB SUB SUB SUB*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* SUB SUB DAS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), + +/* XOR XOR XOR XOR*/ +/*30*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* XOR XOR AAA*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), +/* CMP CMP CMP CMP*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* CMP CMP AAS*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, INVALID, PAIR_NP | CYCLES(7), + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* INC ESP INC EBP INC ESI INC EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* POP EAX POP ECX POP EDX POP EBX*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* POP ESP POP EBP POP ESI POP EDI*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(9), + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(10), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(10), +/* INSB INSW OUTSB OUTSW*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), + +/* Jxx*/ +/*70*/ PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + +/*80*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* TEST TEST XCHG XCHG*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* MOV MOV MOV MOV*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* MOV from seg LEA MOV to seg POP*/ + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_XY | CYCLES(1), + +/* NOP XCHG XCHG XCHG*/ +/*90*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* XCHG XCHG XCHG XCHG*/ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +/* CBW CWD CALL far WAIT*/ + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), +/* PUSHF POPF SAHF LAHF*/ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(2), + +/* MOV MOV MOV MOV*/ +/*a0*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* MOVSB MOVSW CMPSB CMPSW*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* TEST TEST STOSB STOSW*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* LODSB LODSW SCASB SCASW*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/* MOV*/ +/*b0*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, PAIR_X_BRANCH | CYCLES(3), PAIR_X_BRANCH | CYCLES(2), +/* LES LDS MOV MOV*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +/* ENTER LEAVE RETF RETF*/ + PAIR_XY | CYCLES(13), PAIR_XY | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), +/* INT3 INT INTO IRET*/ + PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(16), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(10), + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + PAIR_XY | CYCLES(18), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(4), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ PAIR_X_BRANCH| CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), +/* CALL JMP JMP JMP*/ + PAIR_X_BRANCH | CYCLES_REG, PAIR_X_BRANCH | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_X_BRANCH | CYCLES_REG, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(14), + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* HLT CMC*/ + PAIR_NP | CYCLES(4), PAIR_XY | CYCLES(2), INVALID, INVALID, +/* CLC STC CLI STI*/ + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), +/* CLD STD INCDEC*/ + PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_REG, INVALID +}; + +static uint32_t opcode_timings_0f[256] = +{ +/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), + INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, + PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ PAIR_NP | CYCLES(9), CYCLES(1), PAIR_NP | CYCLES(9), INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + INVALID, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + +/*70*/ INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES(1), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + +/*80*/ PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + +/*90*/ PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + +/*a0*/ PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(12), PAIR_XY | CYCLES(5), + PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(5), INVALID, INVALID, + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), INVALID, PAIR_XY | CYCLES(5), + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), INVALID, PAIR_NP | CYCLES(10), + +/*b0*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(4), PAIR_XY | CYCLES(5), + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + INVALID, INVALID, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(5), + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), INVALID, + +/*c0*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), + PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(4), + +/*d0*/ INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + INVALID, PAIR_X | CYCLES_RM, INVALID, INVALID, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, + +/*e0*/ INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, + INVALID, PAIR_X | CYCLES_RM, INVALID, INVALID, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, + +/*f0*/ INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, + INVALID, PAIR_X | CYCLES_RM, INVALID, INVALID, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, + PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, +}; +static uint32_t opcode_timings_0f_mod3[256] = +{ +/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), + INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, + PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ PAIR_NP | CYCLES(9), CYCLES(1), PAIR_NP | CYCLES(9), INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + INVALID, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + +/*70*/ INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES(1), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + +/*80*/ PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, PAIR_X_BRANCH | CYCLES_BRANCH, + +/*90*/ PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + +/*a0*/ PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(12), PAIR_XY | CYCLES(5), + PAIR_XY | CYCLES(4), PAIR_XY | CYCLES(5), INVALID, INVALID, + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), INVALID, PAIR_XY | CYCLES(5), + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), INVALID, PAIR_NP | CYCLES(10), + +/*b0*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(4), PAIR_XY | CYCLES(5), + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), + INVALID, INVALID, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(5), + PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(1), INVALID, +/*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + +/*d0*/ INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + INVALID, PAIR_X | CYCLES_REG, INVALID, INVALID, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, + +/*e0*/ INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, + INVALID, PAIR_X | CYCLES_REG, INVALID, INVALID, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, + +/*f0*/ INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, + INVALID, PAIR_X | CYCLES_REG, INVALID, INVALID, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, + PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, +}; + +static uint32_t opcode_timings_shift[8] = +{ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, +}; +static uint32_t opcode_timings_shift_mod3[8] = +{ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +}; +static uint32_t opcode_timings_shift_imm[8] = +{ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, +}; +static uint32_t opcode_timings_shift_imm_mod3[8] = +{ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, +}; +static uint32_t opcode_timings_shift_cl[8] = +{ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +}; +static uint32_t opcode_timings_shift_cl_mod3[8] = +{ + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), + PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), +}; + +static uint32_t opcode_timings_f6[8] = +{ +/* TST NOT NEG*/ + PAIR_XY | CYCLES_RM, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) +}; +static uint32_t opcode_timings_f6_mod3[8] = +{ +/* TST NOT NEG*/ + PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) +}; +static uint32_t opcode_timings_f7[8] = +{ +/* TST NOT NEG*/ + PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) +}; +static uint32_t opcode_timings_f7_mod3[8] = +{ +/* TST NOT NEG*/ + PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) +}; +static uint32_t opcode_timings_ff[8] = +{ +/* INC DEC CALL CALL far*/ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), +/* JMP JMP far PUSH*/ + PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(1), INVALID +}; +static uint32_t opcode_timings_ff_mod3[8] = +{ +/* INC DEC CALL CALL far*/ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), +/* JMP JMP far PUSH*/ + PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), PAIR_XY | CYCLES(2), INVALID +}; + +static uint32_t opcode_timings_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), +/* FSUBs FSUBRs FDIVs FDIVRs*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) +}; +static uint32_t opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), +/* FSUB FSUBR FDIV FDIVR*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) +}; + +static uint32_t opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), +/* FLDENV FLDCW FSTENV FSTCW*/ + PAIR_X | CYCLES(30), PAIR_X | CYCLES(4), PAIR_X | CYCLES(24), PAIR_X | CYCLES(5) +}; +static uint32_t opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), + /*FXCH*/ + PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), + PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), PAIR_X | CYCLES(3), + /*FNOP*/ + PAIR_X | CYCLES(2), INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), +/* opFCHS opFABS*/ + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), INVALID, INVALID, +/* opFTST opFXAM (oddly low) */ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), +/* opFLDEG2 opFLDLN2 opFLDZ*/ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + PAIR_X | CYCLES(92), PAIR_X | CYCLES(170), PAIR_X | CYCLES(129), PAIR_X | CYCLES(161), +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, PAIR_X | CYCLES(4), PAIR_X | CYCLES(2), +/* opFPREM opFSQRT opFSINCOS*/ + PAIR_X | CYCLES(91), INVALID, PAIR_X | CYCLES(60), PAIR_X | CYCLES(161), +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + PAIR_X | CYCLES(20), PAIR_X | CYCLES(14), PAIR_X | CYCLES(140), PAIR_X | CYCLES(141) +}; + +static uint32_t opcode_timings_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + PAIR_X | CYCLES(29), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(48) +}; +static uint32_t opcode_timings_da_mod3[8] = +{ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + INVALID, PAIR_X | CYCLES(5), INVALID, INVALID +}; + + +static uint32_t opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil*/ + PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), +/* FLDe FSTPe*/ + INVALID, PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2) +}; +static uint32_t opcode_timings_db_mod3[64] = +{ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), + +/* opFNOP opFCLEX opFINIT*/ + INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(5), PAIR_X | CYCLES(8), +/* opFNOP opFNOP*/ + PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +}; + +static uint32_t opcode_timings_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), +/* FSUBd FSUBRd FDIVd FDIVRd*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) +}; +static uint32_t opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) +}; + +static uint32_t opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), +/* FRSTOR FSAVE FSTSW*/ + PAIR_X | CYCLES(72), INVALID, PAIR_X | CYCLES(67), PAIR_X | CYCLES(2) +}; +static uint32_t opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + PAIR_X | CYCLES(3), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), +/* FUCOM FUCOMP*/ + PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, INVALID +}; + +static uint32_t opcode_timings_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + PAIR_X | CYCLES(27), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(38) +}; +static uint32_t opcode_timings_de_mod3[8] = +{ +/* FADD FMUL FCOMPP*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, PAIR_X | CYCLES(7), +/* FSUB FSUBR FDIV FDIVR*/ + PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) +}; + +static uint32_t opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + PAIR_X | CYCLES(8), INVALID, PAIR_X | CYCLES(10), PAIR_X | CYCLES(13), +/* FILDiq FBSTP FISTPiq*/ + INVALID, PAIR_X | CYCLES(8), PAIR_X | CYCLES(63), PAIR_X | CYCLES(13) +}; +static uint32_t opcode_timings_df_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + PAIR_X | CYCLES(6), INVALID, INVALID, INVALID +}; + +static uint32_t opcode_timings_8x[8] = +{ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM +}; +static uint32_t opcode_timings_8x_mod3[8] = +{ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG +}; +static uint32_t opcode_timings_81[8] = +{ + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM +}; +static uint32_t opcode_timings_81_mod3[8] = +{ + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG +}; + +static int decode_delay; +static uint8_t last_prefix; + +static inline int COUNT(uint32_t c, int op_32) +{ + if (c & CYCLES_HAS_MULTI) + { + if (op_32 & 0x100) + return ((uintptr_t)c >> 8) & 0xff; + return (uintptr_t)c & 0xff; + } + if (!(c & PAIR_MASK)) + return c & 0xffff; + + return c & CYCLES_MASK; +} + +void codegen_timing_686_block_start() +{ + prev_full = decode_delay = 0; + regmask_modified = last_regmask_modified = 0; +} + +void codegen_timing_686_start() +{ + decode_delay = 0; + last_prefix = 0; +} + +void codegen_timing_686_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if ((prefix & 0xf8) == 0xd8) + { + last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) + { + /*0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + + /*6x86 can decode 1 prefix per instruction per clock with no penalty. If + either instruction has more than one prefix then decode is delayed by + one cycle for each additional prefix*/ + decode_delay++; + last_prefix = prefix; +} + +static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +{ + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + + if (addr_regmask & IMPL_ESP) + addr_regmask |= (1 << REG_ESP); + + if (regmask_modified & addr_regmask) + { + regmask_modified = 0; + return 2; + } + + if (last_regmask_modified & addr_regmask) + return 1; + + return 0; +} + +void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + + switch (last_prefix) + { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xc1: + timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd0: case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: case 0xd3: + timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + /*One prefix per instruction is free*/ + decode_delay--; + if (decode_delay < 0) + decode_delay = 0; + + if (prev_full) + { + uint32_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, op_32); + int agi_stall = 0; + + if (regmask & IMPL_ESP) + regmask |= SRCDEP_ESP | DSTDEP_ESP; + + agi_stall = check_agi(prev_deps, prev_opcode, prev_fetchdat, prev_op_32); + + /*Second instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP) + { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } + else if (((timings[opcode] & PAIR_MASK) == PAIR_X || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) + && (prev_timings[opcode] & PAIR_MASK) == PAIR_X) + { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } + else if (prev_regmask & regmask) + { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } + else + { + int t1 = COUNT(prev_timings[prev_opcode], prev_op_32); + int t2 = COUNT(timings[opcode], op_32); + int t_pair = (t1 > t2) ? t1 : t2; + + if (!t_pair) + fatal("Pairable 0 cycles! %02x %02x\n", opcode, prev_opcode); + + agi_stall = check_agi(deps, opcode, fetchdat, op_32); + + codegen_block_cycles += t_pair + agi_stall; + decode_delay = (-t_pair) + 1 + agi_stall; + + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | prev_regmask; + prev_full = 0; + return; + } + } + + if (!prev_full) + { + /*First instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) + { + /*Instruction not pairable*/ + int agi_stall = 0; + + agi_stall = check_agi(deps, opcode, fetchdat, op_32); + + codegen_block_cycles += COUNT(timings[opcode], op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(timings[opcode], op_32)) + 1 + agi_stall; + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + } + else + { + /*Instruction might pair with next*/ + prev_full = 1; + prev_opcode = opcode; + prev_timings = timings; + prev_op_32 = op_32; + prev_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + if (prev_regmask & IMPL_ESP) + prev_regmask |= SRCDEP_ESP | DSTDEP_ESP; + prev_deps = deps; + prev_fetchdat = fetchdat; + return; + } + } +} + +void codegen_timing_686_block_end() +{ + if (prev_full) + { + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay; + prev_full = 0; + } +} + +codegen_timing_t codegen_timing_686 = +{ + codegen_timing_686_start, + codegen_timing_686_prefix, + codegen_timing_686_opcode, + codegen_timing_686_block_start, + codegen_timing_686_block_end, + NULL +}; diff --git a/src/codegen/codegen_timing_common.c b/src/codegen/codegen_timing_common.c new file mode 100644 index 000000000..7d4a37453 --- /dev/null +++ b/src/codegen/codegen_timing_common.c @@ -0,0 +1,850 @@ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> + +#include "codegen_timing_common.h" + +uint64_t opcode_deps[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* ADD ADD PUSH ES POP ES*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, +/* OR OR OR OR*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* OR OR PUSH CS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, 0, + +/* ADC ADC ADC ADC*/ +/*10*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* ADC ADC PUSH SS POP SS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, +/* SBB SBB SBB SBB*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* SBB SBB PUSH DS POP DS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, + +/* AND AND AND AND*/ +/*20*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* AND AND DAA*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, +/* SUB SUB SUB SUB*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* SUB SUB DAS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, + +/* XOR XOR XOR XOR*/ +/*30*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* XOR XOR AAA*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, +/* CMP CMP CMP CMP*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, +/* CMP CMP AAS*/ + SRCDEP_EAX, SRCDEP_EAX, 0, SRCDEP_EAX | DSTDEP_EAX, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ SRCDEP_EAX | DSTDEP_EAX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EBX | DSTDEP_EBX, +/* INC ESP INC EBP INC ESI INC EDI*/ + SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EBP | DSTDEP_EBP, SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EDI | DSTDEP_EDI, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EBX | DSTDEP_EBX, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EBP | DSTDEP_EBP, SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EDI | DSTDEP_EDI, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ SRCDEP_EAX | IMPL_ESP, SRCDEP_ECX | IMPL_ESP, SRCDEP_EDX | IMPL_ESP, SRCDEP_EBX | IMPL_ESP, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + SRCDEP_ESP | IMPL_ESP, SRCDEP_EBP | IMPL_ESP, SRCDEP_ESI | IMPL_ESP, SRCDEP_EDI | IMPL_ESP, +/* POP EAX POP ECX POP EDX POP EBX*/ + DSTDEP_EAX | IMPL_ESP, DSTDEP_ECX | IMPL_ESP, DSTDEP_EDX | IMPL_ESP, DSTDEP_EBX | IMPL_ESP, +/* POP ESP POP EBP POP ESI POP EDI*/ + DSTDEP_ESP | IMPL_ESP, DSTDEP_EBP | IMPL_ESP, DSTDEP_ESI | IMPL_ESP, DSTDEP_EDI | IMPL_ESP, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ IMPL_ESP, IMPL_ESP, 0, 0, + 0, 0, 0, 0, +/* PUSH imm IMUL PUSH imm IMUL*/ + IMPL_ESP, DSTDEP_REG | MODRM, IMPL_ESP, DSTDEP_REG | MODRM, +/* INSB INSW OUTSB OUTSW*/ + 0, 0, 0, 0, + +/* Jxx*/ +/*70*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +/*80*/ 0, 0, 0, 0, +/* TEST TEST XCHG XCHG*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, +/* MOV MOV MOV MOV*/ + SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, DSTDEP_REG | MODRM, DSTDEP_REG | MODRM, +/* MOV from seg LEA MOV to seg POP*/ + MODRM, DSTDEP_REG | MODRM, MODRM, IMPL_ESP | MODRM, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ 0, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EBX | DSTDEP_EBX, +/* XCHG XCHG XCHG XCHG*/ + SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EBP | DSTDEP_EBP, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EDI | DSTDEP_EDI, +/* CBW CWD CALL far WAIT*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EDX, 0, 0, +/* PUSHF POPF SAHF LAHF*/ + IMPL_ESP, IMPL_ESP, SRCDEP_EAX, DSTDEP_EAX, + +/* MOV MOV MOV MOV*/ +/*a0*/ DSTDEP_EAX, DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX, +/* MOVSB MOVSW CMPSB CMPSW*/ + 0, 0, 0, 0, +/* TEST TEST STOSB STOSW*/ + SRCDEP_EAX, SRCDEP_EAX, 0, 0, +/* LODSB LODSW SCASB SCASW*/ + 0, 0, 0, 0, + +/* MOV*/ +/*b0*/ DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_ESP, DSTDEP_EBP, DSTDEP_ESI, DSTDEP_EDI, + +/* RET imm RET*/ +/*c0*/ 0, 0, SRCDEP_ESP | DSTDEP_ESP, IMPL_ESP, +/* LES LDS MOV MOV*/ + DSTDEP_REG | MODRM, DSTDEP_REG | MODRM, MODRM, MODRM, +/* ENTER LEAVE RETF RETF*/ + IMPL_ESP, IMPL_ESP, IMPL_ESP, IMPL_ESP, +/* INT3 INT INTO IRET*/ + 0, 0, 0, 0, + + +/*d0*/ 0, 0, 0, 0, +/* AAM AAD SETALC XLAT*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX | SRCDEP_EBX, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX, +/* IN AL IN AX OUT_AL OUT_AX*/ + DSTDEP_EAX, DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX, +/* CALL JMP JMP JMP*/ + IMPL_ESP, 0, 0, 0, +/* IN AL IN AX OUT_AL OUT_AX*/ + SRCDEP_EDX | DSTDEP_EAX, SRCDEP_EDX | DSTDEP_EAX, SRCDEP_EDX | SRCDEP_EAX, SRCDEP_EDX | SRCDEP_EAX, + +/* REPNE REPE*/ +/*f0*/ 0, 0, 0, 0, +/* HLT CMC*/ + 0, 0, 0, 0, +/* CLC STC CLI STI*/ + 0, 0, 0, 0, +/* CLD STD INCDEC*/ + 0, 0, MODRM, 0 +}; + +uint64_t opcode_deps_mod3[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* ADD ADD PUSH ES POP ES*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, +/* OR OR OR OR*/ + SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* OR OR PUSH CS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, 0, + +/* ADC ADC ADC ADC*/ +/*10*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* ADC ADC PUSH SS POP SS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, +/* SBB SBB SBB SBB*/ + SRCDEP_REG |SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* SBB SBB PUSH DS POP DS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, IMPL_ESP, IMPL_ESP, + +/* AND AND AND AND*/ +/*20*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* AND AND DAA*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, +/* SUB SUB SUB SUB*/ + SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* SUB SUB DAS*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, + +/* XOR XOR XOR XOR*/ +/*30*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, +/* XOR XOR AAA*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX | MODRM, 0, SRCDEP_EAX | DSTDEP_EAX, +/* CMP CMP CMP CMP*/ + SRCDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | MODRM, +/* CMP CMP AAS*/ + SRCDEP_EAX, SRCDEP_EAX, 0, SRCDEP_EAX | DSTDEP_EAX, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ SRCDEP_EAX | DSTDEP_EAX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EBX | DSTDEP_EBX, +/* INC ESP INC EBP INC ESI INC EDI*/ + SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EBP | DSTDEP_EBP, SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EDI | DSTDEP_EDI, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EBX | DSTDEP_EBX, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EBP | DSTDEP_EBP, SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EDI | DSTDEP_EDI, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ SRCDEP_EAX | IMPL_ESP, SRCDEP_ECX | IMPL_ESP, SRCDEP_EDX | IMPL_ESP, SRCDEP_EBX | IMPL_ESP, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + SRCDEP_ESP | IMPL_ESP, SRCDEP_EBP | IMPL_ESP, SRCDEP_ESI | IMPL_ESP, SRCDEP_EDI | IMPL_ESP, +/* POP EAX POP ECX POP EDX POP EBX*/ + DSTDEP_EAX | IMPL_ESP, DSTDEP_ECX | IMPL_ESP, DSTDEP_EDX | IMPL_ESP, DSTDEP_EBX | IMPL_ESP, +/* POP ESP POP EBP POP ESI POP EDI*/ + DSTDEP_ESP | IMPL_ESP, DSTDEP_EBP | IMPL_ESP, DSTDEP_ESI | IMPL_ESP, DSTDEP_EDI | IMPL_ESP, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ IMPL_ESP, IMPL_ESP, 0, 0, + 0, 0, 0, 0, +/* PUSH imm IMUL PUSH imm IMUL*/ + IMPL_ESP, DSTDEP_REG | SRCDEP_RM | MODRM, IMPL_ESP, DSTDEP_REG | SRCDEP_RM | MODRM, +/* INSB INSW OUTSB OUTSW*/ + 0, 0, 0, 0, + +/* Jxx*/ +/*70*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +/*80*/ 0, 0, 0, 0, +/* TEST TEST XCHG XCHG*/ + SRCDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, +/* MOV MOV MOV MOV*/ + SRCDEP_REG | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_REG | MODRM, SRCDEP_RM | DSTDEP_REG | MODRM, +/* MOV from seg LEA MOV to seg POP*/ + DSTDEP_RM | MODRM, DSTDEP_REG | MODRM, SRCDEP_RM | MODRM, IMPL_ESP | DSTDEP_RM | MODRM, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ 0, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ECX | DSTDEP_ECX, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EDX | DSTDEP_EDX, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EBX | DSTDEP_EBX, +/* XCHG XCHG XCHG XCHG*/ + SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ESP | DSTDEP_ESP, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EBP | DSTDEP_EBP, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_ESI | DSTDEP_ESI, SRCDEP_EAX | DSTDEP_EAX | SRCDEP_EDI | DSTDEP_EDI, +/* CBW CWD CALL far WAIT*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EDX, 0, 0, +/* PUSHF POPF SAHF LAHF*/ + IMPL_ESP, IMPL_ESP, SRCDEP_EAX, DSTDEP_EAX, + +/* MOV MOV MOV MOV*/ +/*a0*/ DSTDEP_EAX, DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX, +/* MOVSB MOVSW CMPSB CMPSW*/ + 0, 0, 0, 0, +/* TEST TEST STOSB STOSW*/ + SRCDEP_EAX, SRCDEP_EAX, 0, 0, +/* LODSB LODSW SCASB SCASW*/ + 0, 0, 0, 0, + +/* MOV*/ +/*b0*/ DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_EAX, DSTDEP_ECX, DSTDEP_EDX, DSTDEP_EBX, + DSTDEP_ESP, DSTDEP_EBP, DSTDEP_ESI, DSTDEP_EDI, + +/* RET imm RET*/ +/*c0*/ 0, 0, SRCDEP_ESP | DSTDEP_ESP, IMPL_ESP, +/* LES LDS MOV MOV*/ + DSTDEP_REG | MODRM, DSTDEP_REG | MODRM, DSTDEP_RM | MODRM, DSTDEP_RM | MODRM, +/* ENTER LEAVE RETF RETF*/ + IMPL_ESP, IMPL_ESP, IMPL_ESP, IMPL_ESP, +/* INT3 INT INTO IRET*/ + 0, 0, 0, 0, + + +/*d0*/ 0, 0, 0, 0, +/* AAM AAD SETALC XLAT*/ + SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX | DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX | SRCDEP_EBX, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX | DSTDEP_ECX, SRCDEP_ECX, +/* IN AL IN AX OUT_AL OUT_AX*/ + DSTDEP_EAX, DSTDEP_EAX, SRCDEP_EAX, SRCDEP_EAX, +/* CALL JMP JMP JMP*/ + IMPL_ESP, 0, 0, 0, +/* IN AL IN AX OUT_AL OUT_AX*/ + SRCDEP_EDX | DSTDEP_EAX, SRCDEP_EDX | DSTDEP_EAX, SRCDEP_EDX | SRCDEP_EAX, SRCDEP_EDX | SRCDEP_EAX, + +/* REPNE REPE*/ +/*f0*/ 0, 0, 0, 0, +/* HLT CMC*/ + 0, 0, 0, 0, +/* CLC STC CLI STI*/ + 0, 0, 0, 0, +/* CLD STD INCDEC*/ + 0, 0, SRCDEP_RM | DSTDEP_RM | MODRM, 0 +}; + +uint64_t opcode_deps_0f[256] = +{ +/*00*/ MODRM, MODRM, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, MODRM, + +/*10*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*20*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*30*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*40*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*50*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*60*/ MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + MODRM, MODRM, MODRM, MODRM, + MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, 0, MODRM, MODRM, + +/*70*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + MODRM, MODRM, MODRM, 0, + 0, 0, 0, 0, + 0, 0, MODRM, MODRM, + +/*80*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*90*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + +/*a0*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*b0*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + 0, 0, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + +/*c0*/ MODRM, MODRM, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*d0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*e0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, 0, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*f0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, MODRM, 0, + MODRM, MODRM, MODRM, 0, +}; +uint64_t opcode_deps_0f_mod3[256] = +{ +/*00*/ MODRM, MODRM, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, MODRM, + +/*10*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*20*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*30*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*40*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*50*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*60*/ MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + MODRM, MODRM, MODRM, MODRM, + MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, 0, MODRM, MODRM, + +/*70*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + MODRM, MODRM, MODRM, 0, + 0, 0, 0, 0, + 0, 0, MODRM, MODRM, + +/*80*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*90*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + +/*a0*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*b0*/ MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + 0, 0, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, + +/*c0*/ MODRM, MODRM, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*d0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*e0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, 0, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, 0, MODRM, + MODRM, MODRM, 0, MODRM, + +/*f0*/ 0, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, MODRM | MMX_SHIFTPACK, + 0, MODRM | MMX_MULTIPLY, 0, 0, + MODRM, MODRM, MODRM, 0, + MODRM, MODRM, MODRM, 0, +}; + +uint64_t opcode_deps_0f0f[256] = +{ +/*00*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, 0, + +/*10*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, 0, + +/*20*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*30*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*40*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*50*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*60*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*70*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*80*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*90*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, MODRM, 0, + 0, 0, MODRM, 0, + +/*a0*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*b0*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, MODRM, + +/*c0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*d0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*e0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*f0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; +uint64_t opcode_deps_0f0f_mod3[256] = +{ +/*00*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, 0, + +/*10*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, MODRM, 0, 0, + +/*20*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*30*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*40*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*50*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*60*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*70*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*80*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*90*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, MODRM, 0, + 0, 0, MODRM, 0, + +/*a0*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*b0*/ MODRM, 0, 0, 0, + MODRM, 0, MODRM, MODRM, + 0, 0, 0, 0, + 0, 0, 0, MODRM, + +/*c0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*d0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*e0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + +/*f0*/ 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, +}; + +uint64_t opcode_deps_shift[8] = +{ + MODRM, MODRM, MODRM, MODRM, + MODRM, MODRM, MODRM, MODRM, +}; +uint64_t opcode_deps_shift_mod3[8] = +{ + SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, + SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, +}; + +uint64_t opcode_deps_shift_cl[8] = +{ + MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, + MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, +}; +uint64_t opcode_deps_shift_cl_mod3[8] = +{ + SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, + SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, +}; + +uint64_t opcode_deps_f6[8] = +{ +/* TST NOT NEG*/ + MODRM, 0, MODRM, MODRM, +/* MUL IMUL DIV IDIV*/ + SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM +}; +uint64_t opcode_deps_f6_mod3[8] = +{ +/* TST NOT NEG*/ + SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, +/* MUL IMUL DIV IDIV*/ + SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM +}; +uint64_t opcode_deps_f7[8] = +{ +/* TST NOT NEG*/ + MODRM, 0, MODRM, MODRM, +/* MUL IMUL DIV IDIV*/ + SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM +}; +uint64_t opcode_deps_f7_mod3[8] = +{ +/* TST NOT NEG*/ + SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, +/* MUL IMUL DIV IDIV*/ + SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM +}; +uint64_t opcode_deps_ff[8] = +{ +/* INC DEC CALL CALL far*/ + MODRM, MODRM, MODRM | IMPL_ESP, MODRM, +/* JMP JMP far PUSH*/ + MODRM, MODRM, MODRM | IMPL_ESP, 0 +}; +uint64_t opcode_deps_ff_mod3[8] = +{ +/* INC DEC CALL CALL far*/ + SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | MODRM | IMPL_ESP, MODRM, +/* JMP JMP far PUSH*/ + SRCDEP_RM | MODRM, MODRM, SRCDEP_RM | MODRM | IMPL_ESP, 0 +}; + +uint64_t opcode_deps_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_POP | FPU_READ_ST0 | MODRM, +/* FSUBs FSUBRs FDIVs FDIVRs*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM +}; +uint64_t opcode_deps_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG, FPU_POP | FPU_READ_ST0 | FPU_READ_STREG, +/* FSUB FSUBR FDIV FDIVR*/ + FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG +}; + +uint64_t opcode_deps_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_POP | MODRM, +/* FLDENV FLDCW FSTENV FSTCW*/ + MODRM, MODRM, MODRM, MODRM +}; +uint64_t opcode_deps_d9_mod3[64] = +{ + /*FLD*/ + FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, + FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, + /*FXCH*/ + FPU_FXCH, FPU_FXCH, FPU_FXCH, FPU_FXCH, + FPU_FXCH, FPU_FXCH, FPU_FXCH, FPU_FXCH, + /*FNOP*/ + 0, 0, 0, 0, 0, 0, 0, 0, + /*FSTP*/ + FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, + FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, +/* opFCHS opFABS*/ + 0, 0, 0, 0, +/* opFTST opFXAM*/ + 0, 0, 0, 0, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + FPU_PUSH, FPU_PUSH, FPU_PUSH, FPU_PUSH, +/* opFLDEG2 opFLDLN2 opFLDZ*/ + FPU_PUSH, FPU_PUSH, FPU_PUSH, 0, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + 0, 0, 0, 0, +/* opFDECSTP opFINCSTP,*/ + 0, 0, 0, 0, +/* opFPREM opFSQRT opFSINCOS*/ + 0, 0, 0, 0, +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + 0, 0, 0, 0 +}; + +uint64_t opcode_deps_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM +}; +uint64_t opcode_deps_da_mod3[8] = +{ + 0, 0, 0, 0, +/* FCOMPP*/ + 0, FPU_POP2, 0, 0 +}; + + +uint64_t opcode_deps_db[8] = +{ +/* FLDil FSTil FSTPil*/ + FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FLDe FSTPe*/ + 0, FPU_PUSH | MODRM, 0, FPU_READ_ST0 | FPU_POP | MODRM +}; +uint64_t opcode_deps_db_mod3[64] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + + + 0, 0, 0, 0, 0, 0, 0, 0, + +/* opFNOP opFCLEX opFINIT*/ + 0, 0, 0, 0, +/* opFNOP opFNOP*/ + 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +uint64_t opcode_deps_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FSUBd FSUBRd FDIVd FDIVRd*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM +}; +uint64_t opcode_deps_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, 0, 0, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG +}; + +uint64_t opcode_deps_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FRSTOR FSAVE FSTSW*/ + MODRM, 0, MODRM, MODRM +}; +uint64_t opcode_deps_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + 0, 0, FPU_READ_ST0 | FPU_WRITE_STREG, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, +/* FUCOM FUCOMP*/ + FPU_READ_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG | FPU_POP, 0, 0 +}; + +uint64_t opcode_deps_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM +}; +uint64_t opcode_deps_de_mod3[8] = +{ +/* FADDP FMULP FCOMPP*/ + FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, 0, FPU_READ_ST0 | FPU_READ_ST1 | FPU_POP2, +/* FSUBP FSUBRP FDIVP FDIVRP*/ + FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP +}; + +uint64_t opcode_deps_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, +/* FILDiq FBSTP FISTPiq*/ + 0, FPU_PUSH | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, FPU_READ_ST0 | FPU_POP | MODRM +}; +uint64_t opcode_deps_df_mod3[8] = +{ + 0, 0, 0, 0, +/* FSTSW AX*/ + 0, 0, 0, 0 +}; + +uint64_t opcode_deps_81[8] = +{ + MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, + MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632 +}; +uint64_t opcode_deps_81_mod3[8] = +{ + SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, + SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | MODRM | HAS_IMM1632 +}; +uint64_t opcode_deps_8x[8] = +{ + MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, + MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8 +}; +uint64_t opcode_deps_8x_mod3[8] = +{ + SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, + SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | MODRM | HAS_IMM8 +}; diff --git a/src/codegen/codegen_timing_common.h b/src/codegen/codegen_timing_common.h new file mode 100644 index 000000000..0ec5adcac --- /dev/null +++ b/src/codegen/codegen_timing_common.h @@ -0,0 +1,232 @@ +#include "codegen_ops.h" + +/*Instruction has input dependency on register in REG field*/ +#define SRCDEP_REG (1ull << 0) +/*Instruction has input dependency on register in R/M field*/ +#define SRCDEP_RM (1ull << 1) +/*Instruction modifies register in REG field*/ +#define DSTDEP_REG (1ull << 2) +/*Instruction modifies register in R/M field*/ +#define DSTDEP_RM (1ull << 3) + +#define SRCDEP_SHIFT 4 +#define DSTDEP_SHIFT 12 + +/*Instruction has input dependency on given register*/ +#define SRCDEP_EAX (1ull << 4) +#define SRCDEP_ECX (1ull << 5) +#define SRCDEP_EDX (1ull << 6) +#define SRCDEP_EBX (1ull << 7) +#define SRCDEP_ESP (1ull << 8) +#define SRCDEP_EBP (1ull << 9) +#define SRCDEP_ESI (1ull << 10) +#define SRCDEP_EDI (1ull << 11) + +/*Instruction modifies given register*/ +#define DSTDEP_EAX (1ull << 12) +#define DSTDEP_ECX (1ull << 13) +#define DSTDEP_EDX (1ull << 14) +#define DSTDEP_EBX (1ull << 15) +#define DSTDEP_ESP (1ull << 16) +#define DSTDEP_EBP (1ull << 17) +#define DSTDEP_ESI (1ull << 18) +#define DSTDEP_EDI (1ull << 19) + +/*Instruction has ModR/M byte*/ +#define MODRM (1ull << 20) +/*Instruction implicitly uses ESP*/ +#define IMPL_ESP (1ull << 21) + +/*Instruction is MMX shift or pack/unpack instruction*/ +#define MMX_SHIFTPACK (1ull << 22) +/*Instruction is MMX multiply instruction*/ +#define MMX_MULTIPLY (1ull << 23) + +/*Instruction pops the FPU stack*/ +#define FPU_POP (1ull << 24) +/*Instruction pops the FPU stack twice*/ +#define FPU_POP2 (1ull << 25) +/*Instruction pushes onto the FPU stack*/ +#define FPU_PUSH (1ull << 26) + +/*Instruction writes to ST(0)*/ +#define FPU_WRITE_ST0 (1ull << 27) +/*Instruction reads from ST(0)*/ +#define FPU_READ_ST0 (1ull << 28) +/*Instruction reads from and writes to ST(0)*/ +#define FPU_RW_ST0 (3ull << 27) + +/*Instruction reads from ST(1)*/ +#define FPU_READ_ST1 (1ull << 29) +/*Instruction writes to ST(1)*/ +#define FPU_WRITE_ST1 (1ull << 30) +/*Instruction reads from and writes to ST(1)*/ +#define FPU_RW_ST1 (3ull << 29) + +/*Instruction reads from ST(reg)*/ +#define FPU_READ_STREG (1ull << 31) +/*Instruction writes to ST(reg)*/ +#define FPU_WRITE_STREG (1ull << 32) +/*Instruction reads from and writes to ST(reg)*/ +#define FPU_RW_STREG (3ull << 30) + +#define FPU_FXCH (1ull << 33) + + +#define HAS_IMM8 (1ull << 34) +#define HAS_IMM1632 (1ull << 35) + + +#define REGMASK_IMPL_ESP (1 << 8) +#define REGMASK_SHIFTPACK (1 << 9) +#define REGMASK_MULTIPLY (1 << 9) + + +extern uint64_t opcode_deps[256]; +extern uint64_t opcode_deps_mod3[256]; +extern uint64_t opcode_deps_0f[256]; +extern uint64_t opcode_deps_0f_mod3[256]; +extern uint64_t opcode_deps_0f0f[256]; +extern uint64_t opcode_deps_0f0f_mod3[256]; +extern uint64_t opcode_deps_shift[8]; +extern uint64_t opcode_deps_shift_mod3[8]; +extern uint64_t opcode_deps_shift_cl[8]; +extern uint64_t opcode_deps_shift_cl_mod3[8]; +extern uint64_t opcode_deps_f6[8]; +extern uint64_t opcode_deps_f6_mod3[8]; +extern uint64_t opcode_deps_f7[8]; +extern uint64_t opcode_deps_f7_mod3[8]; +extern uint64_t opcode_deps_ff[8]; +extern uint64_t opcode_deps_ff_mod3[8]; +extern uint64_t opcode_deps_d8[8]; +extern uint64_t opcode_deps_d8_mod3[8]; +extern uint64_t opcode_deps_d9[8]; +extern uint64_t opcode_deps_d9_mod3[64]; +extern uint64_t opcode_deps_da[8]; +extern uint64_t opcode_deps_da_mod3[8]; +extern uint64_t opcode_deps_db[8]; +extern uint64_t opcode_deps_db_mod3[64]; +extern uint64_t opcode_deps_dc[8]; +extern uint64_t opcode_deps_dc_mod3[8]; +extern uint64_t opcode_deps_dd[8]; +extern uint64_t opcode_deps_dd_mod3[8]; +extern uint64_t opcode_deps_de[8]; +extern uint64_t opcode_deps_de_mod3[8]; +extern uint64_t opcode_deps_df[8]; +extern uint64_t opcode_deps_df_mod3[8]; +extern uint64_t opcode_deps_81[8]; +extern uint64_t opcode_deps_81_mod3[8]; +extern uint64_t opcode_deps_8x[8]; +extern uint64_t opcode_deps_8x_mod3[8]; + + + +static inline uint32_t get_addr_regmask(uint64_t data, uint32_t fetchdat, int op_32) +{ + uint32_t addr_regmask = 0; + + if (data & MODRM) + { + uint8_t modrm = fetchdat & 0xff; + + if ((modrm & 0xc0) != 0xc0) + { + if (op_32 & 0x200) + { + if ((modrm & 0x7) == 4) + { + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0 && (sib & 7) != 5) + { + addr_regmask = 1 << (sib & 7); + if ((sib & 0x38) != 0x20) + addr_regmask |= 1 << ((sib >> 3) & 7); + } + } + else if ((modrm & 0xc7) != 5) + { + addr_regmask = 1 << (modrm & 7); + } + } + else + { + if ((modrm & 0xc7) != 0x06) + { + switch (modrm & 7) + { + case 0: addr_regmask = REG_BX | REG_SI; break; + case 1: addr_regmask = REG_BX | REG_DI; break; + case 2: addr_regmask = REG_BP | REG_SI; break; + case 3: addr_regmask = REG_BP | REG_DI; break; + case 4: addr_regmask = REG_SI; break; + case 5: addr_regmask = REG_DI; break; + case 6: addr_regmask = REG_BP; break; + case 7: addr_regmask = REG_BX; break; + } + } + } + } + } + + if (data & IMPL_ESP) + addr_regmask |= REGMASK_IMPL_ESP; + + return addr_regmask; +} + +static inline uint32_t get_srcdep_mask(uint64_t data, uint32_t fetchdat, int bit8, int op_32) +{ + uint32_t mask = 0; + if (data & SRCDEP_REG) + { + int reg = (fetchdat >> 3) & 7; + if (bit8) + reg &= 3; + mask |= (1 << reg); + } + if (data & SRCDEP_RM) + { + int reg = fetchdat & 7; + if (bit8) + reg &= 3; + mask |= (1 << reg); + } + mask |= ((data >> SRCDEP_SHIFT) & 0xff); + if (data & MMX_SHIFTPACK) + mask |= REGMASK_SHIFTPACK; + if (data & MMX_MULTIPLY) + mask |= REGMASK_MULTIPLY; + + mask |= get_addr_regmask(data, fetchdat, op_32); + + return mask; +} + +static inline uint32_t get_dstdep_mask(uint64_t data, uint32_t fetchdat, int bit8) +{ + uint32_t mask = 0; + if (data & DSTDEP_REG) + { + int reg = (fetchdat >> 3) & 7; + if (bit8) + reg &= 3; + mask |= (1 << reg); + } + if (data & DSTDEP_RM) + { + int reg = fetchdat & 7; + if (bit8) + reg &= 3; + mask |= (1 << reg); + } + mask |= ((data >> DSTDEP_SHIFT) & 0xff); + if (data & MMX_SHIFTPACK) + mask |= REGMASK_SHIFTPACK; + if (data & MMX_MULTIPLY) + mask |= REGMASK_MULTIPLY; + if (data & IMPL_ESP) + mask |= REGMASK_IMPL_ESP | (1 << REG_ESP); + + return mask; +} diff --git a/src/codegen/codegen_timing_k6.c b/src/codegen/codegen_timing_k6.c new file mode 100644 index 000000000..a9b1aca13 --- /dev/null +++ b/src/codegen/codegen_timing_k6.c @@ -0,0 +1,2353 @@ +/*Most of the vector instructions here are a total guess. + Some of the timings are based on http://users.atw.hu/instlatx64/AuthenticAMD0000562_K6_InstLatX86.txt*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> +#include <86box/machine.h> + +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "386_common.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +typedef enum uop_type_t +{ + UOP_ALU = 0, /*Executes in Integer X or Y units*/ + UOP_ALUX, /*Executes in Integer X unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORE, /*Executes in Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORE, /*Executes in Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORE, /*Executes in Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MEU, /*Executes in Multimedia unit*/ + UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ + UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ + UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_LIMM /*Does not require an execution unit*/ +} uop_type_t; + +typedef enum decode_type_t +{ + DECODE_SHORT, + DECODE_LONG, + DECODE_VECTOR +} decode_type_t; + +#define MAX_UOPS 10 + +typedef struct risc86_uop_t +{ + uop_type_t type; + int throughput; + int latency; +} risc86_uop_t; + +typedef struct risc86_instruction_t +{ + int nr_uops; + decode_type_t decode_type; + risc86_uop_t uop[MAX_UOPS]; +} risc86_instruction_t; + +static const risc86_instruction_t alu_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alu_store_op = +{ + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t alux_store_op = +{ + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t branch_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t limm_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t load_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t store_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; + + +static const risc86_instruction_t bswap_op = +{ + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t leave_op = +{ + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t lods_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mov_reg_seg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t movs_op = +{ + .nr_uops = 4, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_reg_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pop_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_imm_op = +{ + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, +}; +static const risc86_instruction_t push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t push_seg_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t stos_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_reg_b_op = +{ + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t test_mem_imm_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t xchg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t m3dn_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mmx_mul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t mmx_shift_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_3dn_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t load_mmx_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t load_mmx_shift_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t mload_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t mstore_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t pmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t pmul_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +}; + +static const risc86_instruction_t float_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t load_float_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t fstore_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t fdiv_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fdiv_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +}; +static const risc86_instruction_t fsin_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} +}; +static const risc86_instruction_t fsqrt_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} +}; + +static const risc86_instruction_t vector_fldcw_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} +}; +static const risc86_instruction_t vector_float_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_float_l_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} +}; +static const risc86_instruction_t vector_flde_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +}; +static const risc86_instruction_t vector_fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, + .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; + +static const risc86_instruction_t vector_alu1_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu3_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu6_op = +{ + .nr_uops = 6, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux1_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux3_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux6_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alu_store_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_alux_store_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_bsx_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_call_far_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} +}; +static const risc86_instruction_t vector_cmps_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_cmpxchg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cmpxchg_b_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +}; +static const risc86_instruction_t vector_cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} +}; +static const risc86_instruction_t vector_div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +}; +static const risc86_instruction_t vector_emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} +}; +static const risc86_instruction_t vector_enter_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} +}; +static const risc86_instruction_t vector_in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} +}; +static const risc86_instruction_t vector_ins_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_int_op = +{ + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} +}; +static const risc86_instruction_t vector_jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_load_alux_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_mem_seg_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +}; +static const risc86_instruction_t vector_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_out_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} +}; +static const risc86_instruction_t vector_outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_pusha_op = +{ + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_popf_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} +}; +static const risc86_instruction_t vector_push_mem_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_pushf_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xchg_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +}; +static const risc86_instruction_t vector_xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +}; +static const risc86_instruction_t vector_wbinvd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} +}; + +#define INVALID NULL + +static const risc86_instruction_t *opcode_timings[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ +/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID +}; + +static const risc86_instruction_t *opcode_timings_mod3[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op, +/* OR OR OR OR*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* OR OR PUSH CS */ + &alux_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* ADC ADC PUSH SS POP SS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, +/* SBB SBB SBB SBB*/ + &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op, +/* SBB SBB PUSH DS POP DS*/ + &vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* AND AND DAA*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, +/* SUB SUB SUB SUB*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* SUB SUB DAS*/ + &alux_op, &alu_op, INVALID, &vector_alux1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alux_op, &alu_op, &alux_op, &alu_op, +/* XOR XOR AAA*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, +/* CMP CMP CMP CMP*/ + &alux_op, &alu_op, &alux_op, &alu_op, +/* CMP CMP AAS*/ + &alux_op, &alu_op, INVALID, &vector_alux6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op, +/* INSB INSW OUTSB OUTSW*/ + &vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op, +/* PUSHF POPF SAHF LAHF*/ + &vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &vector_scasb_op, &vector_scas_op, + +/* MOV*/ +/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + &limm_op, &limm_op, &limm_op, &limm_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op, +/* LES LDS MOV MOV*/ + &vector_lss_op, &vector_lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op, +/* INT3 INT INTO IRET*/ + &vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &vector_jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &vector_alux1_op, &vector_alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, +/* CLD STD INCDEC*/ + &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID +}; + +static const risc86_instruction_t *opcode_timings_0f[256] = +{ +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &vector_femms_op, &load_3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + INVALID, INVALID, &mload_op, &mload_op, + +/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mstore_op, &mstore_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op, + &vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op, + &vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op, + INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op, + &vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op, + +/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &vector_cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +}; +static const risc86_instruction_t *opcode_timings_0f_mod3[256] = +{ +/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, + &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &vector_femms_op, &m3dn_op, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, + &vector_alu6_op, &vector_alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &vector_emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, + +/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, INVALID, + &push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op, + &vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op, + +/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op, + &vector_lss_op, &vector_lss_op, &alux_op, &alu_op, + INVALID, INVALID, &vector_alu1_op, &vector_alu1_op, + &vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op, + +/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, +}; + +static const risc86_instruction_t *opcode_timings_0f0f[256] = +{ +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &load_3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_3dn_op, &load_3dn_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*a0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_3dn_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, &load_3dn_op, INVALID, + INVALID, INVALID, &load_3dn_op, INVALID, + +/*b0*/ &load_3dn_op, INVALID, INVALID, INVALID, + &load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &load_mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +}; +static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = +{ +/*00*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, &m3dn_op, INVALID, INVALID, + +/*20*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*70*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*80*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*90*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &m3dn_op, &m3dn_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*a0*/ &m3dn_op, INVALID, INVALID, INVALID, + &m3dn_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, &m3dn_op, INVALID, + INVALID, INVALID, &m3dn_op, INVALID, + +/*b0*/ &m3dn_op, INVALID, INVALID, INVALID, + &mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, &mmx_op, + +/*c0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*d0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*e0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*f0*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +}; + +static const risc86_instruction_t *opcode_timings_shift[8] = +{ + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op +}; +static const risc86_instruction_t *opcode_timings_shift_b[8] = +{ + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op +}; +static const risc86_instruction_t *opcode_timings_shift_mod3[8] = +{ + &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op +}; +static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = +{ + &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, + &alux_op, &alux_op, &alux_op, &alux_op +}; + +static const risc86_instruction_t *opcode_timings_80[8] = +{ + &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, + &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, +}; +static const risc86_instruction_t *opcode_timings_80_mod3[8] = +{ + &alux_op, &alux_op, &alux_store_op, &alux_store_op, + &alux_op, &alux_op, &alux_op, &alux_op, +}; +static const risc86_instruction_t *opcode_timings_8x[8] = +{ + &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, +}; +static const risc86_instruction_t *opcode_timings_8x_mod3[8] = +{ + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, +}; + +static const risc86_instruction_t *opcode_timings_f6[8] = +{ +/* TST NOT NEG*/ + &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, +}; +static const risc86_instruction_t *opcode_timings_f6_mod3[8] = +{ +/* TST NOT NEG*/ + &test_reg_b_op, INVALID, &alux_op, &alux_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, +}; +static const risc86_instruction_t *opcode_timings_f7[8] = +{ +/* TST NOT NEG*/ + &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, +}; +static const risc86_instruction_t *opcode_timings_f7_mod3[8] = +{ +/* TST NOT NEG*/ + &test_reg_op, INVALID, &alu_op, &alu_op, +/* MUL IMUL DIV IDIV*/ + &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, +}; +static const risc86_instruction_t *opcode_timings_ff[8] = +{ +/* INC DEC CALL CALL far*/ + &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID +}; +static const risc86_instruction_t *opcode_timings_ff_mod3[8] = +{ +/* INC DEC CALL CALL far*/ + &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID +}; + +static const risc86_instruction_t *opcode_timings_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBs FSUBRs FDIVs FDIVRs*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const risc86_instruction_t *opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + &float_op, &float_op, &float_op, &float_op, +/* FSUB FSUBR FDIV FDIVR*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, +}; + +static const risc86_instruction_t *opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDENV FLDCW FSTENV FSTCW*/ + &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op +}; +static const risc86_instruction_t *opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FXCH*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FNOP*/ + &float_op, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, +/* opFCHS opFABS*/ + &float_op, &float_op, INVALID, INVALID, +/* opFTST opFXAM*/ + &float_op, &float_op, INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + &float_op, &float_op, &float_op, &float_op, +/* opFLDEG2 opFLDLN2 opFLDZ*/ + &float_op, &float_op, &float_op, INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + &fsin_op, &fsin_op, &fsin_op, &fsin_op, +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, &float_op, &float_op, +/* opFPREM opFSQRT opFSINCOS*/ + &fdiv_op, INVALID, &fsqrt_op, &fsin_op, +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + &float_op, &fdiv_op, &fsin_op, &fsin_op +}; + +static const risc86_instruction_t *opcode_timings_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const risc86_instruction_t *opcode_timings_da_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, &float_op, INVALID, INVALID +}; + +static const risc86_instruction_t *opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDe FSTPe*/ + INVALID, &vector_flde_op, INVALID, &vector_fste_op +}; +static const risc86_instruction_t *opcode_timings_db_mod3[64] = +{ + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, &float_op, &float_op, &float_op, +/* opFNOP opFNOP*/ + &float_op, &float_op, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +}; + +static const risc86_instruction_t *opcode_timings_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FSUBd FSUBRd FDIVd FDIVRd*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const risc86_instruction_t *opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + &float_op, &float_op, INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + &float_op, &float_op, &fdiv_op, &fdiv_op +}; + +static const risc86_instruction_t *opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FRSTOR FSAVE FSTSW*/ + &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op +}; +static const risc86_instruction_t *opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + &float_op, INVALID, &float_op, &float_op, +/* FUCOM FUCOMP*/ + &float_op, &float_op, INVALID, INVALID +}; + +static const risc86_instruction_t *opcode_timings_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + &load_float_op, &load_float_op, &load_float_op, &load_float_op, +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const risc86_instruction_t *opcode_timings_de_mod3[8] = +{ +/* FADDP FMULP FCOMPP*/ + &float_op, &float_op, INVALID, &float_op, +/* FSUBP FSUBRP FDIVP FDIVRP*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, +}; + +static const risc86_instruction_t *opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FILDiq FBSTP FISTPiq*/ + INVALID, &load_float_op, &vector_float_l_op, &fstore_op, +}; +static const risc86_instruction_t *opcode_timings_df_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + &float_op, INVALID, INVALID, INVALID +}; + + +static uint8_t last_prefix; +static int prefixes; + +static int decode_timestamp; +static int last_complete_timestamp; + +typedef struct k6_unit_t +{ + uint32_t uop_mask; + int first_available_cycle; +} k6_unit_t; + +static int nr_units; +static k6_unit_t *units; + +/*K6 has dedicated MMX unit*/ +static k6_unit_t k6_units[] = +{ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ + {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ + {.uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, /*Multimedia*/ + {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ + {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +}; +#define NR_K6_UNITS (sizeof(k6_units) / sizeof(k6_unit_t)) + +/*K6-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and + 3DNow ALU between two execution units*/ +static k6_unit_t k6_2_units[] = +{ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, + {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ + {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ + {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +}; +#define NR_K6_2_UNITS (sizeof(k6_2_units) / sizeof(k6_unit_t)) + +/*First available cycles of shared execution units. Each of these can be submitted + to by ALU X and Y*/ +static int mul_first_available_cycle; +static int shift_first_available_cycle; +static int m3dnow_first_available_cycle; + +static int uop_run(const risc86_uop_t *uop, int decode_time) +{ + int c; + k6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; + + /*UOP_LIMM does not require execution*/ + if (uop->type == UOP_LIMM) + return decode_time; + + /*Handle shared units on K6-2 and later*/ + if (units == k6_2_units) + { + if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) + decode_time = mul_first_available_cycle; + else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) + decode_time = shift_first_available_cycle; + else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) + decode_time = m3dnow_first_available_cycle; + } + + /*Find execution unit for this uOP*/ + for (c = 0; c < nr_units; c++) + { + if (units[c].uop_mask & (1 << uop->type)) + { + if (units[c].first_available_cycle < best_start_cycle) + { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } + } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); + + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->throughput; + + if (units == k6_2_units) + { + if (uop->type == UOP_MEU_MUL) + mul_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_SHIFT) + shift_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_3DN) + m3dnow_first_available_cycle = best_start_cycle + uop->throughput; + } + + return best_start_cycle + uop->throughput; +} + +/*The K6 decoder can decode, per clock : + - 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long + - 1 'long' instruction, up to 4 uOPs + - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) +*/ +static struct +{ + int nr_uops; + const risc86_uop_t *uops[4]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[4]; +} decode_buffer; + +#define NR_OPQUADS 6 +/*Timestamps of when the last six opquads completed. The K6 scheduler retires + opquads in order, so this is needed to determine when the next can be scheduled*/ +static int opquad_completion_timestamp[NR_OPQUADS]; +static int next_opquad = 0; + +#define NR_REGS 8 +/*Timestamp of when last operation on an integer register completed*/ +static int reg_available_timestamp[NR_REGS]; +/*Timestamp of when last operation on an FPU register completed*/ +static int fpu_st_timestamp[8]; +/*Completion time of the last uop to be processed. Used to calculate timing of + dependent uop chains*/ +static int last_uop_timestamp = 0; + +void decode_flush() +{ + int c; + int uop_timestamp = 0; + + /*Decoded opquad can not be submitted if there are no free spaces in the + opquad buffer*/ + if (decode_timestamp < opquad_completion_timestamp[next_opquad]) + decode_timestamp = opquad_completion_timestamp[next_opquad]; + + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; + + /*Submit uops to execution units, and determine the latest completion time*/ + for (c = 0; c < decode_buffer.nr_uops; c++) + { + int start_timestamp; + + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; + else + start_timestamp = decode_buffer.earliest_start[c]; + + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } + + /*Calculate opquad completion time. Since opquads complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opquad in buffer*/ + opquad_completion_timestamp[next_opquad] = last_complete_timestamp; + next_opquad++; + if (next_opquad == NR_OPQUADS) + next_opquad = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; +} + +/*The instruction is only of interest here if it's longer than 7 bytes, as that's the + limit on K6 short decoding*/ +static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +{ + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) + { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; + + if (op_32 & 0x200) + { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) + { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; + } + } + + return len; +} + +static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +{ + uint32_t regmask_required; + uint32_t regmask_modified; + int c, d; + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) + { + if (regmask_required & (1 << c)) + { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; + } + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if ((deps & FPU_RW_STREG)) + { + int reg = fetchdat & 7; + + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } + + /*Short decoders are limited to 7 bytes*/ + if (decode_type == DECODE_SHORT && instr_length > 7) + decode_type = DECODE_LONG; + /*Long decoder is limited to 11 bytes*/ + else if (instr_length > 11) + decode_type = DECODE_VECTOR; + + switch (decode_type) + { + case DECODE_SHORT: + if (decode_buffer.nr_uops) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + if (ins->nr_uops > 1) + { + decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; + decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; + } + decode_buffer.nr_uops += ins->nr_uops; + + decode_flush(); + } + else + { + decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + if (ins->nr_uops > 1) + { + decode_buffer.uops[1] = &ins->uop[1]; + decode_buffer.earliest_start[1] = -1; + } + } + break; + + case DECODE_LONG: + if (decode_buffer.nr_uops) + decode_flush(); + + decode_buffer.nr_uops = ins->nr_uops; + for (c = 0; c < ins->nr_uops; c++) + { + decode_buffer.uops[c] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[c] = earliest_start; + else + decode_buffer.earliest_start[c] = -1; + } + decode_flush(); + break; + + case DECODE_VECTOR: + if (decode_buffer.nr_uops) + decode_flush(); + + decode_timestamp++; + d = 0; + + for (c = 0; c < ins->nr_uops; c++) + { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if (d == 4) + { + d = 0; + decode_buffer.nr_uops = 4; + decode_flush(); + } + } + if (d) + { + decode_buffer.nr_uops = d; + decode_flush(); + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) + { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) + { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) + { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) + { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) + { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && + !(reg == 0 && (deps & FPU_WRITE_ST0)) && + !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void codegen_timing_k6_block_start() +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + mul_first_available_cycle = 0; + shift_first_available_cycle = 0; + m3dnow_first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPQUADS; c++) + opquad_completion_timestamp[c] = 0; + next_opquad = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void codegen_timing_k6_start() +{ + if (machines[machine].cpu[cpu_manufacturer].cpus[cpu].cpu_type == CPU_K6) + { + units = k6_units; + nr_units = NR_K6_UNITS; + } + else + { + units = k6_2_units; + nr_units = NR_K6_2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void codegen_timing_k6_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const risc86_instruction_t **ins_table; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) + { + case 0x0f: + if (opcode == 0x0f) + { + /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ + uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ + uint8_t modrm = fetchdat & 0xff; + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0) + { + if (op_32 & 0x200) + { + if ((modrm & 7) == 4) + { + /* Has SIB*/ + opcode_pc++; + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((sib & 0x07) == 0x05) + opcode_pc += 4; + } + else + { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((modrm & 0xc7) == 0x05) + opcode_pc += 4; + } + } + else + { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 2; + else if ((modrm & 0xc7) == 0x06) + opcode_pc += 2; + } + } + + opcode = fastreadb(cs + opcode_pc); + + ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; + deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; + } + else + { + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + } + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xd0: case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: case 0xd1: case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} + +void codegen_timing_k6_block_end() +{ + if (decode_buffer.nr_uops) + { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush(); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } +} + +int codegen_timing_k6_jump_cycles() +{ + if (decode_buffer.nr_uops) + return 1; + return 0; +} + +codegen_timing_t codegen_timing_k6 = +{ + codegen_timing_k6_start, + codegen_timing_k6_prefix, + codegen_timing_k6_opcode, + codegen_timing_k6_block_start, + codegen_timing_k6_block_end, + codegen_timing_k6_jump_cycles +}; diff --git a/src/codegen/codegen_timing_p6.c b/src/codegen/codegen_timing_p6.c new file mode 100644 index 000000000..a69f68f04 --- /dev/null +++ b/src/codegen/codegen_timing_p6.c @@ -0,0 +1,2108 @@ +/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/ +/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> +#include <86box/machine.h> + +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "386_common.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +typedef enum uop_type_t +{ + UOP_ALU = 0, /*Executes in Port 0 or 1 ALU units*/ + UOP_ALUP0, /*Executes in Port 0 ALU unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MMX, /*Executes in Port 0 or 1 ALU units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Port 1 ALU unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Port 0 ALU unit. Uses MMX multiplier*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_FXCH /*Does not require an execution unit*/ +} uop_type_t; + +typedef enum decode_type_t +{ + DECODE_SIMPLE, + DECODE_COMPLEX, +} decode_type_t; + +#define MAX_UOPS 10 + +typedef struct p6_uop_t +{ + uop_type_t type; + int latency; +} p6_uop_t; + +typedef struct macro_op_t +{ + int nr_uops; + decode_type_t decode_type; + p6_uop_t uop[MAX_UOPS]; +} macro_op_t; + +static const macro_op_t alu_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t alup0_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t load_alu_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t load_alup0_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t alu_store_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} + }; +static const macro_op_t alup0_store_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; + +static const macro_op_t branch_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 2} +}; + +static const macro_op_t fxch_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FXCH, .latency = 1} +}; + +static const macro_op_t load_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LOAD, .latency = 1} +}; + +static const macro_op_t store_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; + + +static const macro_op_t bswap_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, +}; +static const macro_op_t leave_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t lods_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t loop_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 2} +}; +static const macro_op_t mov_reg_seg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, +}; +static const macro_op_t movs_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t pop_reg_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t pop_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t push_imm_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, +}; +static const macro_op_t push_mem_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const macro_op_t push_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t stos_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t test_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t test_reg_b_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t test_mem_imm_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t test_mem_imm_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t xchg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; + + +static const macro_op_t mmx_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1} +}; +static const macro_op_t mmx_mul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +}; +static const macro_op_t mmx_shift_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1} +}; +static const macro_op_t load_mmx_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = {.type = UOP_MMX, .latency = 2} +}; +static const macro_op_t load_mmx_mul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +}; +static const macro_op_t load_mmx_shift_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 2} +}; +static const macro_op_t mload_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 1}, +}; + +static const macro_op_t mstore_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = {.type = UOP_MSTOREA, .latency = 1} +}; +static const macro_op_t pmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +}; +static const macro_op_t pmul_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +}; +static const macro_op_t float_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const macro_op_t fadd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 2} +}; +static const macro_op_t fmul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 3} +}; +static const macro_op_t float2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const macro_op_t fchs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const macro_op_t load_float_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1} +}; +static const macro_op_t load_fadd_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 2} +}; +static const macro_op_t load_fmul_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 4} +}; +static const macro_op_t fstore_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +}; +static const macro_op_t load_fiadd_op = +{ + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 1}, + .uop[3] = {.type = UOP_FLOAT, .latency = 1}, + .uop[4] = {.type = UOP_FLOAT, .latency = 1}, + .uop[5] = {.type = UOP_FLOAT, .latency = 1}, + .uop[6] = {.type = UOP_FLOAT, .latency = 1} +}; +static const macro_op_t fdiv_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 37} +}; +static const macro_op_t fdiv_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAT, .latency = 37} +}; +static const macro_op_t fsin_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 62} +}; +static const macro_op_t fsqrt_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} +}; + +static const macro_op_t fldcw_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} +}; +static const macro_op_t complex_float_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} +}; +static const macro_op_t complex_float_l_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} +}; +static const macro_op_t flde_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = {.type = UOP_FLOAD, .latency = 1}, + .uop[2] = {.type = UOP_FLOAT, .latency = 2} +}; +static const macro_op_t fste_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = {.type = UOP_FSTORED, .latency = 1}, + .uop[2] = {.type = UOP_FSTOREA, .latency = 1} +}; + +static const macro_op_t complex_alu1_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t alu2_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t alu3_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t alu6_op = +{ + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1}, + .uop[4] = {.type = UOP_ALU, .latency = 1}, + .uop[5] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t complex_alup0_1_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t alup0_3_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t alup0_6_op = +{ + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALUP0, .latency = 1}, + .uop[3] = {.type = UOP_ALUP0, .latency = 1}, + .uop[4] = {.type = UOP_ALUP0, .latency = 1}, + .uop[5] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t arpl_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const macro_op_t bound_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t bsx_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} +}; +static const macro_op_t call_far_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t cli_sti_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} +}; +static const macro_op_t cmps_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t cmpsb_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t cmpxchg_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const macro_op_t cmpxchg_b_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_STORED, .latency = 1}, + .uop[3] = {.type = UOP_STOREA, .latency = 1} +}; +static const macro_op_t complex_push_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1} +}; + +static const macro_op_t cpuid_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} +}; +static const macro_op_t div16_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 21} +}; +static const macro_op_t div16_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 21} +}; +static const macro_op_t div32_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 37} +}; +static const macro_op_t div32_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 37} +}; +static const macro_op_t emms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 50} +}; +static const macro_op_t enter_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = {.type = UOP_STOREA, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 10} +}; +static const macro_op_t femms_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} +}; +static const macro_op_t in_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18} +}; +static const macro_op_t ins_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t int_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_STORED, .latency = 1}, + .uop[4] = {.type = UOP_STOREA, .latency = 1}, + .uop[5] = {.type = UOP_STORED, .latency = 1}, + .uop[6] = {.type = UOP_STOREA, .latency = 1}, + .uop[7] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t iret_op = +{ + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3}, + .uop[1] = {.type = UOP_LOAD, .latency = 3}, + .uop[2] = {.type = UOP_LOAD, .latency = 3}, + .uop[3] = {.type = UOP_ALU, .latency = 20}, + .uop[4] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t invd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} +}; +static const macro_op_t jmp_far_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t lss_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 3} +}; +static const macro_op_t mov_mem_seg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, +}; +static const macro_op_t mov_seg_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3} +}; +static const macro_op_t mov_seg_reg_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} +}; +static const macro_op_t mul_op = +{ + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t mul_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t mul64_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t mul64_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALUP0, .latency = 1}, + .uop[3] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t out_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 18} +}; +static const macro_op_t outs_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 18} +}; +static const macro_op_t pusha_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = {.type = UOP_STOREA, .latency = 2}, + .uop[2] = {.type = UOP_STORED, .latency = 2}, + .uop[3] = {.type = UOP_STOREA, .latency = 2}, + .uop[4] = {.type = UOP_STORED, .latency = 2}, + .uop[5] = {.type = UOP_STOREA, .latency = 2}, + .uop[6] = {.type = UOP_STORED, .latency = 2}, + .uop[7] = {.type = UOP_STOREA, .latency = 2} +}; +static const macro_op_t popa_op = +{ + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1}, + .uop[2] = {.type = UOP_LOAD, .latency = 1}, + .uop[3] = {.type = UOP_LOAD, .latency = 1}, + .uop[4] = {.type = UOP_LOAD, .latency = 1}, + .uop[5] = {.type = UOP_LOAD, .latency = 1}, + .uop[6] = {.type = UOP_LOAD, .latency = 1}, + .uop[7] = {.type = UOP_LOAD, .latency = 1} +}; +static const macro_op_t popf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 6}, + .uop[2] = {.type = UOP_ALUP0, .latency = 10} +}; +static const macro_op_t pushf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1} +}; +static const macro_op_t ret_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t retf_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 3}, + .uop[2] = {.type = UOP_BRANCH, .latency = 1} +}; +static const macro_op_t scas_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t scasb_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t setcc_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_FSTORED, .latency = 1}, + .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +}; +static const macro_op_t setcc_reg_op = +{ + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1}, + .uop[2] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t test_mem_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t test_mem_b_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_ALUP0, .latency = 1} +}; +static const macro_op_t xchg_mem_op = +{ + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = {.type = UOP_STOREA, .latency = 1}, + .uop[3] = {.type = UOP_ALU, .latency = 1} +}; +static const macro_op_t xlat_op = +{ + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = {.type = UOP_LOAD, .latency = 1} +}; +static const macro_op_t wbinvd_op = +{ + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} +}; +#define INVALID NULL + +static const macro_op_t *opcode_timings[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alup0_op, &alu_op, &push_seg_op, &mov_seg_mem_op, +/* OR OR OR OR*/ + &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* OR OR PUSH CS */ + &alup0_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* ADC ADC PUSH SS POP SS*/ + &complex_alup0_1_op, &complex_alu1_op, &push_seg_op, &mov_seg_mem_op, +/* SBB SBB SBB SBB*/ +/*10*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* SBB SBB PUSH DS POP DS*/ + &complex_alup0_1_op, &complex_alu1_op, &push_seg_op, &mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* AND AND DAA*/ + &alup0_op, &alu_op, INVALID, &complex_alup0_1_op, +/* SUB SUB SUB SUB*/ + &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* SUB SUB DAS*/ + &alup0_op, &alu_op, INVALID, &complex_alup0_1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, +/* XOR XOR AAA*/ + &alup0_op, &alu_op, INVALID, &alup0_6_op, +/* CMP CMP CMP CMP*/ + &load_alup0_op, &load_alu_op, &load_alup0_op, &load_alu_op, +/* CMP CMP AAS*/ + &alup0_op, &alu_op, INVALID, &alup0_6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &pusha_op, &popa_op, &bound_op, &arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &mul_op, &push_imm_op, &mul_op, +/* INSB INSW OUTSB OUTSW*/ + &ins_op, &ins_op, &outs_op, &outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &test_mem_b_op, &test_mem_op, &xchg_mem_op, &xchg_mem_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &mov_mem_seg_op, &store_op, &mov_seg_mem_op, &pop_mem_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &fxch_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &complex_alu1_op, &complex_alu1_op, &call_far_op, &fxch_op, +/* PUSHF POPF SAHF LAHF*/ + &pushf_op, &popf_op, &complex_alup0_1_op, &complex_alup0_1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &cmpsb_op, &cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &scasb_op, &scas_op, + +/* MOV*/ +/*b0*/ &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &ret_op, &ret_op, +/* LES LDS MOV MOV*/ + &lss_op, &lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &enter_op, &leave_op, &retf_op, &retf_op, +/* INT3 INT INTO IRET*/ + &int_op, &int_op, &int_op, &iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &alup0_6_op, &alup0_3_op, &complex_alup0_1_op, &xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &loop_op, &loop_op, &loop_op, &loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &in_op, &in_op, &out_op, &out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &in_op, &in_op, &out_op, &out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &complex_alup0_1_op, &alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, +/* CLD STD INCDEC*/ + &complex_alu1_op, &complex_alu1_op, &alup0_store_op, INVALID +}; + +static const macro_op_t *opcode_timings_mod3[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ &alup0_op, &alu_op, &alup0_op, &alu_op, +/* ADD ADD PUSH ES POP ES*/ + &alup0_op, &alu_op, &push_seg_op, &mov_seg_mem_op, +/* OR OR OR OR*/ + &alup0_op, &alu_op, &alup0_op, &alu_op, +/* OR OR PUSH CS */ + &alup0_op, &alu_op, &push_seg_op, INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ &complex_alup0_1_op, &complex_alu1_op, &complex_alup0_1_op, &complex_alu1_op, +/* ADC ADC PUSH SS POP SS*/ + &complex_alup0_1_op, &complex_alu1_op, &push_seg_op, &mov_seg_mem_op, +/* SBB SBB SBB SBB*/ + &complex_alup0_1_op, &complex_alu1_op, &complex_alup0_1_op, &complex_alu1_op, +/* SBB SBB PUSH DS POP DS*/ + &complex_alup0_1_op, &complex_alu1_op, &push_seg_op, &mov_seg_mem_op, + +/* AND AND AND AND*/ +/*20*/ &alup0_op, &alu_op, &alup0_op, &alu_op, +/* AND AND DAA*/ + &alup0_op, &alu_op, INVALID, &complex_alup0_1_op, +/* SUB SUB SUB SUB*/ + &alup0_op, &alu_op, &alup0_op, &alu_op, +/* SUB SUB DAS*/ + &alup0_op, &alu_op, INVALID, &complex_alup0_1_op, + +/* XOR XOR XOR XOR*/ +/*30*/ &alup0_op, &alu_op, &alup0_op, &alu_op, +/* XOR XOR AAA*/ + &alup0_op, &alu_op, INVALID, &alup0_6_op, +/* CMP CMP CMP CMP*/ + &alup0_op, &alu_op, &alup0_op, &alu_op, +/* CMP CMP AAS*/ + &alup0_op, &alu_op, INVALID, &alup0_6_op, + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ &alu_op, &alu_op, &alu_op, &alu_op, +/* INC ESP INC EBP INC ESI INC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + &alu_op, &alu_op, &alu_op, &alu_op, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + &alu_op, &alu_op, &alu_op, &alu_op, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ &store_op, &store_op, &store_op, &store_op, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + &store_op, &store_op, &store_op, &store_op, +/* POP EAX POP ECX POP EDX POP EBX*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, +/* POP ESP POP EBP POP ESI POP EDI*/ + &pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ &pusha_op, &popa_op, &bound_op, &arpl_op, + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + &push_imm_op, &mul_op, &push_imm_op, &mul_op, +/* INSB INSW OUTSB OUTSW*/ + &ins_op, &ins_op, &outs_op, &outs_op, + +/* Jxx*/ +/*70*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + &complex_alu1_op, &complex_alu1_op, &alu3_op, &alu3_op, +/* MOV MOV MOV MOV*/ + &store_op, &store_op, &load_op, &load_op, +/* MOV from seg LEA MOV to seg POP*/ + &mov_reg_seg_op, &store_op, &mov_seg_reg_op, &pop_reg_op, + +/* NOP XCHG XCHG XCHG*/ +/*90*/ &fxch_op, &xchg_op, &xchg_op, &xchg_op, +/* XCHG XCHG XCHG XCHG*/ + &xchg_op, &xchg_op, &xchg_op, &xchg_op, +/* CBW CWD CALL far WAIT*/ + &complex_alu1_op, &complex_alu1_op, &call_far_op, &fxch_op, +/* PUSHF POPF SAHF LAHF*/ + &pushf_op, &popf_op, &complex_alup0_1_op, &complex_alup0_1_op, + +/* MOV MOV MOV MOV*/ +/*a0*/ &load_op, &load_op, &store_op, &store_op, +/* MOVSB MOVSW CMPSB CMPSW*/ + &movs_op, &movs_op, &cmpsb_op, &cmps_op, +/* TEST TEST STOSB STOSW*/ + &test_reg_b_op, &test_reg_op, &stos_op, &stos_op, +/* LODSB LODSW SCASB SCASW*/ + &lods_op, &lods_op, &scasb_op, &scas_op, + +/* MOV*/ +/*b0*/ &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + &alu_op, &alu_op, &alu_op, &alu_op, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, &ret_op, &ret_op, +/* LES LDS MOV MOV*/ + &lss_op, &lss_op, &store_op, &store_op, +/* ENTER LEAVE RETF RETF*/ + &enter_op, &leave_op, &retf_op, &retf_op, +/* INT3 INT INTO IRET*/ + &int_op, &int_op, &int_op, &iret_op, + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + &alup0_6_op, &alup0_3_op, &complex_alup0_1_op, &xlat_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ &loop_op, &loop_op, &loop_op, &loop_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &in_op, &in_op, &out_op, &out_op, +/* CALL JMP JMP JMP*/ + &store_op, &branch_op, &jmp_far_op, &branch_op, +/* IN AL IN AX OUT_AL OUT_AX*/ + &in_op, &in_op, &out_op, &out_op, + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, INVALID, INVALID, +/* HLT CMC*/ + &complex_alup0_1_op, &alu2_op, INVALID, INVALID, +/* CLC STC CLI STI*/ + &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, +/* CLD STD INCDEC*/ + &complex_alu1_op, &complex_alu1_op, &complex_alup0_1_op, INVALID +}; + +static const macro_op_t *opcode_timings_0f[256] = +{ +/*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, + INVALID, &alu6_op, &alu6_op, INVALID, + &invd_op, &wbinvd_op, INVALID, INVALID, + INVALID, &load_op, &femms_op, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, + &alu6_op, &alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &alu6_op, &alu6_op, &alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op, + INVALID, INVALID, &mload_op, &mload_op, + +/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + &load_mmx_op, &load_mmx_op, &load_mmx_op, &emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mstore_op, &mstore_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, + &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, + &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, + &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, &setcc_reg_op, + +/*a0*/ &push_seg_op, &mov_seg_mem_op, &cpuid_op, &load_alu_op, + &alu_store_op, &alu_store_op, INVALID, INVALID, + &push_seg_op, &mov_seg_mem_op, INVALID, &load_alu_op, + &alu_store_op, &alu_store_op, INVALID, &mul_op, + +/*b0*/ &cmpxchg_b_op, &cmpxchg_op, &lss_op, &load_alu_op, + &lss_op, &lss_op, &load_alup0_op, &load_alu_op, + INVALID, INVALID, &load_alu_op, &load_alu_op, + &bsx_op, &bsx_op, &load_alup0_op, &load_alu_op, + +/*c0*/ &alup0_store_op, &alu_store_op, INVALID, INVALID, + INVALID, INVALID, INVALID, &cmpxchg_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &load_mmx_mul_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, + +/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op, + INVALID, &pmul_mem_op, INVALID, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, +}; +static const macro_op_t *opcode_timings_0f_mod3[256] = +{ +/*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, + INVALID, &alu6_op, &alu6_op, INVALID, + &invd_op, &wbinvd_op, INVALID, INVALID, + INVALID, INVALID, &femms_op, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, + &alu6_op, &alu6_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ &alu6_op, &alu6_op, &alu6_op, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + &mmx_op, &mmx_op, &mmx_op, &mmx_op, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + &mmx_op, &mmx_op, &mmx_op, &emms_op, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, &mmx_op, &mmx_op, + +/*80*/ &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + &branch_op, &branch_op, &branch_op, &branch_op, + +/*90*/ &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, + &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, + &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, + &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, &setcc_mem_op, + +/*a0*/ &push_seg_op, &mov_seg_mem_op, &cpuid_op, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, INVALID, + &push_seg_op, &mov_seg_mem_op, INVALID, &complex_alu1_op, + &complex_alu1_op, &complex_alu1_op, INVALID, &mul_op, + +/*b0*/ &cmpxchg_b_op, &cmpxchg_op, &lss_op, &complex_alu1_op, + &lss_op, &lss_op, &alup0_op, &alu_op, + INVALID, INVALID, &complex_alu1_op, &complex_alu1_op, + &bsx_op, &bsx_op, &alup0_op, &alu_op, + +/*c0*/ &complex_alup0_1_op, &complex_alu1_op, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + &bswap_op, &bswap_op, &bswap_op, &bswap_op, + +/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &mmx_mul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, INVALID, &mmx_op, + &mmx_op, &mmx_op, INVALID, &mmx_op, + +/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op, + INVALID, &pmul_op, INVALID, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, + &mmx_op, &mmx_op, &mmx_op, INVALID, +}; + +static const macro_op_t *opcode_timings_shift[8] = +{ + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op +}; +static const macro_op_t *opcode_timings_shift_b[8] = +{ + &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, + &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op +}; +static const macro_op_t *opcode_timings_shift_mod3[8] = +{ + &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, + &alu_op, &alu_op, &alu_op, &alu_op +}; +static const macro_op_t *opcode_timings_shift_b_mod3[8] = +{ + &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, + &alup0_op, &alup0_op, &alup0_op, &alup0_op +}; + +static const macro_op_t *opcode_timings_80[8] = +{ + &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, + &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, +}; +static const macro_op_t *opcode_timings_80_mod3[8] = +{ + &alup0_op, &alup0_op, &alup0_store_op, &alup0_store_op, + &alup0_op, &alup0_op, &alup0_op, &alup0_op, +}; +static const macro_op_t *opcode_timings_8x[8] = +{ + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, +}; +static const macro_op_t *opcode_timings_8x_mod3[8] = +{ + &alu_op, &alu_op, &alu_store_op, &alu_store_op, + &alu_op, &alu_op, &alu_op, &alu_op, +}; + +static const macro_op_t *opcode_timings_f6[8] = +{ +/* TST NOT NEG*/ + &test_mem_imm_b_op, INVALID, &alup0_store_op, &alup0_store_op, +/* MUL IMUL DIV IDIV*/ + &mul_mem_op, &mul_mem_op, &div16_mem_op, &div16_mem_op, +}; +static const macro_op_t *opcode_timings_f6_mod3[8] = +{ +/* TST NOT NEG*/ + &test_reg_b_op, INVALID, &alup0_op, &alup0_op, +/* MUL IMUL DIV IDIV*/ + &mul_op, &mul_op, &div16_op, &div16_op, +}; +static const macro_op_t *opcode_timings_f7[8] = +{ +/* TST NOT NEG*/ + &test_mem_imm_op, INVALID, &alu_store_op, &alu_store_op, +/* MUL IMUL DIV IDIV*/ + &mul64_mem_op, &mul64_mem_op, &div32_mem_op, &div32_mem_op, +}; +static const macro_op_t *opcode_timings_f7_mod3[8] = +{ +/* TST NOT NEG*/ + &test_reg_op, INVALID, &alu_op, &alu_op, +/* MUL IMUL DIV IDIV*/ + &mul64_op, &mul64_op, &div32_op, &div32_op, +}; +static const macro_op_t *opcode_timings_ff[8] = +{ +/* INC DEC CALL CALL far*/ + &alu_store_op, &alu_store_op, &store_op, &call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &jmp_far_op, &push_mem_op, INVALID +}; +static const macro_op_t *opcode_timings_ff_mod3[8] = +{ +/* INC DEC CALL CALL far*/ + &complex_alu1_op, &complex_alu1_op, &store_op, &call_far_op, +/* JMP JMP far PUSH*/ + &branch_op, &jmp_far_op, &complex_push_mem_op, INVALID +}; + +static const macro_op_t *opcode_timings_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, +/* FSUBs FSUBRs FDIVs FDIVRs*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const macro_op_t *opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + &fadd_op, &fmul_op, &float_op, &float_op, +/* FSUB FSUBR FDIV FDIVR*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, +}; + +static const macro_op_t *opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDENV FLDCW FSTENV FSTCW*/ + &complex_float_l_op, &fldcw_op, &complex_float_l_op, &complex_float_op +}; +static const macro_op_t *opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + &float_op, &float_op, &float_op, &float_op, + &float_op, &float_op, &float_op, &float_op, + /*FXCH*/ + &fxch_op, &fxch_op, &fxch_op, &fxch_op, + &fxch_op, &fxch_op, &fxch_op, &fxch_op, + /*FNOP*/ + &float_op, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + &float2_op, &float2_op, &float2_op, &float2_op, + &float2_op, &float2_op, &float2_op, &float2_op, +/* opFCHS opFABS*/ + &fchs_op, &float_op, INVALID, INVALID, +/* opFTST opFXAM*/ + &float_op, &float_op, INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + &float_op, &float_op, &float_op, &float_op, +/* opFLDEG2 opFLDLN2 opFLDZ*/ + &float_op, &float_op, &float_op, INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + &fsin_op, &fsin_op, &fsin_op, &fsin_op, +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, &float_op, &float_op, +/* opFPREM opFSQRT opFSINCOS*/ + &fdiv_op, INVALID, &fsqrt_op, &fsin_op, +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + &float_op, &fdiv_op, &fsin_op, &fsin_op +}; + +static const macro_op_t *opcode_timings_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const macro_op_t *opcode_timings_da_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, &float_op, INVALID, INVALID +}; + +static const macro_op_t *opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FLDe FSTPe*/ + INVALID, &flde_op, INVALID, &fste_op +}; +static const macro_op_t *opcode_timings_db_mod3[64] = +{ + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, &float_op, &float_op, &float_op, +/* opFNOP opFNOP*/ + &float_op, &float_op, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +}; + +static const macro_op_t *opcode_timings_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, +/* FSUBd FSUBRd FDIVd FDIVRd*/ + &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, +}; +static const macro_op_t *opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + &fadd_op, &fmul_op, INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + &float_op, &float_op, &fdiv_op, &fdiv_op +}; + +static const macro_op_t *opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FRSTOR FSAVE FSTSW*/ + &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op +}; +static const macro_op_t *opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + &float_op, INVALID, &float_op, &float_op, +/* FUCOM FUCOMP*/ + &float_op, &float_op, INVALID, INVALID +}; + +static const macro_op_t *opcode_timings_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, +}; +static const macro_op_t *opcode_timings_de_mod3[8] = +{ +/* FADDP FMULP FCOMPP*/ + &fadd_op, &fmul_op, INVALID, &float_op, +/* FSUBP FSUBRP FDIVP FDIVRP*/ + &float_op, &float_op, &fdiv_op, &fdiv_op, +}; + +static const macro_op_t *opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + &load_float_op, INVALID, &fstore_op, &fstore_op, +/* FILDiq FBSTP FISTPiq*/ + INVALID, &load_float_op, &complex_float_l_op, &fstore_op, +}; +static const macro_op_t *opcode_timings_df_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + &float_op, INVALID, INVALID, INVALID +}; + + +static uint8_t last_prefix; +static int prefixes; + +static int decode_timestamp; +static int last_complete_timestamp; + +typedef struct p6_unit_t +{ + uint32_t uop_mask; + double first_available_cycle; +} p6_unit_t; + +static int nr_units; +static p6_unit_t *units; + +/*Pentium Pro has no MMX*/ +static p6_unit_t ppro_units[] = +{ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT)}, /*Port 0*/ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Port 1*/ + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Port 2*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Port 3*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Port 4*/ +}; +#define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) + +/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ +static p6_unit_t p2_units[] = +{ + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) | /*Port 0*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, + {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Port 1*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, + {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Port 2*/ + {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Port 3*/ + {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Port 4*/ +}; +#define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) + +static int uop_run(const p6_uop_t *uop, int decode_time) +{ + int c; + p6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; + + /*UOP_FXCH does not require execution*/ + if (uop->type == UOP_FXCH) + return decode_time; + + /*Find execution unit for this uOP*/ + for (c = 0; c < nr_units; c++) + { + if (units[c].uop_mask & (1 << uop->type)) + { + if (units[c].first_available_cycle < best_start_cycle) + { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } + } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); + + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->latency; + + + + return best_start_cycle + uop->latency; +} + +/*The P6 decoders can decode, per clock : + - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long + - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs +*/ +static struct +{ + int nr_uops; + const p6_uop_t *uops[6]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[6]; +} decode_buffer; + +#define NR_OPSEQS 3 +/*Timestamps of when the last three op sequences completed. Technically this is incorrect, +as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/ +static int opseq_completion_timestamp[NR_OPSEQS]; +static int next_opseq = 0; + +#define NR_REGS 8 +/*Timestamp of when last operation on an integer register completed*/ +static int reg_available_timestamp[NR_REGS]; +/*Timestamp of when last operation on an FPU register completed*/ +static int fpu_st_timestamp[8]; +/*Completion time of the last uop to be processed. Used to calculate timing of + dependent uop chains*/ +static int last_uop_timestamp = 0; + +void decode_flush_p6() +{ + int c; + int start_timestamp, uop_timestamp = 0; + + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; + + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; + + /*Submit uops to execution units, and determine the latest completion time*/ + for (c = 0; c < (decode_buffer.nr_uops); c++) + { + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; + else + start_timestamp = decode_buffer.earliest_start[c]; + + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } + + /*Calculate opseq completion time. Since opseqs complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; +} + +/*The instruction is only of interest here if it's longer than 7 bytes, as that's the + limit on P6 simple decoding*/ +static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +{ + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) + { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; + + if (op_32 & 0x200) + { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) + { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; + } + } + + return len; +} + +static void decode_instruction(const macro_op_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +{ + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) + { + if (regmask_required & (1 << c)) + { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; + } + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if ((deps & FPU_RW_STREG)) + { + int reg = fetchdat & 7; + + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } + + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if ((decode_type == DECODE_SIMPLE && instr_length > 7) || (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)) + decode_type = DECODE_COMPLEX; + + switch (decode_type) + { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 3; + decode_flush_p6(); + } + else if (decode_buffer.nr_uops - d == 1) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2+d; + if (d) + decode_flush_p6(); + } + else if (decode_buffer.nr_uops) + { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 1+d; + } + else + { + decode_buffer.nr_uops = 1; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + } + break; + + case DECODE_COMPLEX: + if (decode_buffer.nr_uops) + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ + + d = 0; + + for (c = 0; c < ins->nr_uops; c++) + { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ + { + d = 0; + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ + } + } + if (d) + { + decode_buffer.nr_uops = d; + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) + { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) + { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) + { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) + { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) + { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && + !(reg == 0 && (deps & FPU_WRITE_ST0)) && + !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void codegen_timing_p6_block_start() +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void codegen_timing_p6_start() +{ + if (machines[machine].cpu[cpu_manufacturer].cpus[cpu].cpu_type == CPU_PENTIUMPRO) + { + units = ppro_units; + nr_units = NR_PPRO_UNITS; + } + else + { + units = p2_units; + nr_units = NR_P2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const macro_op_t **ins_table; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) + { + case 0x0f: + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xd0: case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: case 0xd1: case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} + +void codegen_timing_p6_block_end() +{ + if (decode_buffer.nr_uops) + { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush_p6(); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } +} + +int codegen_timing_p6_jump_cycles() +{ + if (decode_buffer.nr_uops) + return 1; + return 0; +} + +codegen_timing_t codegen_timing_p6 = +{ + codegen_timing_p6_start, + codegen_timing_p6_prefix, + codegen_timing_p6_opcode, + codegen_timing_p6_block_start, + codegen_timing_p6_block_end, + codegen_timing_p6_jump_cycles +}; diff --git a/src/codegen/codegen_timing_pentium.c b/src/codegen/codegen_timing_pentium.c new file mode 100644 index 000000000..c95821df9 --- /dev/null +++ b/src/codegen/codegen_timing_pentium.c @@ -0,0 +1,1323 @@ +/*Elements taken into account : + - U/V integer pairing + - FPU/FXCH pairing + - Prefix decode delay (including shadowing) + - FPU latencies + - AGI stalls + Elements not taken into account : + - Branch prediction (beyond most simplistic approximation) + - PMMX decode queue + - MMX latencies +*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include <86box/mem.h> +#include "cpu.h" +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + + +/*Instruction has different execution time for 16 and 32 bit data. Does not pair */ +#define CYCLES_HAS_MULTI (1 << 28) + +#define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) + +/*Instruction lasts given number of cycles. Does not pair*/ +#define CYCLES(c) (c | PAIR_NP) + + +static int pair_timings[4][4] = +{ +/* Reg RM RMW Branch*/ +/*Reg*/ {1, 2, 3, 2}, +/*RM*/ {2, 2, 3, 3}, +/*RMW*/ {3, 4, 5, 4}, +/*Branch*/ {-1, -1, -1, -1} +}; + +/*Instruction follows either register timing, read-modify, or read-modify-write. + May be pairable*/ +#define CYCLES_REG (0ull << 0) +#define CYCLES_RM (1ull << 0) +#define CYCLES_RMW (2ull << 0) +#define CYCLES_BRANCH (3ull << 0) + +/*Instruction has immediate data. Can only be used with PAIR_U/PAIR_V/PAIR_UV*/ +#define CYCLES_HASIMM (3ull << 2) +#define CYCLES_IMM8 (1ull << 2) +#define CYCLES_IMM1632 (2ull << 2) + +#define CYCLES_MASK ((1ull << 7) - 1) + + +/*Instruction does not pair*/ +#define PAIR_NP (0ull << 29) +/*Instruction pairs in U pipe only*/ +#define PAIR_U (1ull << 29) +/*Instruction pairs in V pipe only*/ +#define PAIR_V (2ull << 29) +/*Instruction pairs in both U and V pipes*/ +#define PAIR_UV (3ull << 29) +/*Instruction pairs in U pipe only and only with FXCH*/ +#define PAIR_FX (5ull << 29) +/*Instruction is FXCH and only pairs in V pipe with FX pairable instruction*/ +#define PAIR_FXCH (6ull << 29) + +#define PAIR_FPU (4ull << 29) + +#define PAIR_MASK (7ull << 29) + + +/*comp_time = cycles until instruction complete + i_overlap = cycles that overlap with integer + f_overlap = cycles that overlap with subsequent FPU*/ +#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t)comp_time) | ((uint64_t)i_overlap << 41) | ((uint64_t)f_overlap << 49) | PAIR_FPU + +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) + +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) + +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) + +#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) + + +#define INVALID 0 + +static int u_pipe_full; +static uint32_t u_pipe_opcode; +static uint64_t *u_pipe_timings; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; +static uint64_t *u_pipe_deps; + +static uint32_t regmask_modified; + +static uint32_t addr_regmask; + +static int fpu_latency; +static int fpu_st_latency[8]; + +static uint64_t opcode_timings[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* ADD ADD PUSH ES POP ES*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* OR OR OR OR*/ + PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* OR OR PUSH CS */ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, +/* ADC ADC PUSH SS POP SS*/ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* SBB SBB SBB SBB*/ + PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, +/* SBB SBB PUSH DS POP DS*/ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), + +/* AND AND AND AND*/ +/*20*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* AND AND DAA*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), +/* SUB SUB SUB SUB*/ + PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* SUB SUB DAS*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), + +/* XOR XOR XOR XOR*/ +/*30*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* XOR XOR AAA*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), +/* CMP CMP CMP CMP*/ + PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, +/* CMP CMP AAS*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* INC ESP INC EBP INC ESI INC EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* POP EAX POP ECX POP EDX POP EBX*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* POP ESP POP EBP POP ESI POP EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7), + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), +/* INSB INSW OUTSB OUTSW*/ + PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13), + +/* Jxx*/ +/*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + +/*80*/ INVALID, INVALID, INVALID, INVALID, +/* TEST TEST XCHG XCHG*/ + PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MOV MOV MOV MOV*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV, +/* MOV from seg LEA MOV to seg POP*/ + PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, CYCLES(3), PAIR_NP | CYCLES(3), + +/* NOP XCHG XCHG XCHG*/ +/*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* XCHG XCHG XCHG XCHG*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* CBW CWD CALL far WAIT*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1), +/* PUSHF POPF SAHF LAHF*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/* MOV MOV MOV MOV*/ +/*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* MOVSB MOVSW CMPSB CMPSW*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* TEST TEST STOSB STOSW*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* LODSB LODSW SCASB SCASW*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), + +/* MOV*/ +/*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), +/* LES LDS MOV MOV*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* ENTER LEAVE RETF RETF*/ + PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* INT3 INT INTO IRET*/ + PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), +/* CALL JMP JMP JMP*/ + PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* HLT CMC*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID, +/* CLC STC CLI STI*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), +/* CLD STD INCDEC*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_RMW, INVALID +}; + +static uint64_t opcode_timings_mod3[256] = +{ +/* ADD ADD ADD ADD*/ +/*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* ADD ADD PUSH ES POP ES*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* OR OR OR OR*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* OR OR PUSH CS */ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, + +/* ADC ADC ADC ADC*/ +/*10*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, +/* ADC ADC PUSH SS POP SS*/ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), +/* SBB SBB SBB SBB*/ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, +/* SBB SBB PUSH DS POP DS*/ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), + +/* AND AND AND AND*/ +/*20*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* AND AND DAA*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), +/* SUB SUB SUB SUB*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* SUB SUB DAS*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), + +/* XOR XOR XOR XOR*/ +/*30*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* XOR XOR AAA*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), +/* CMP CMP CMP CMP*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* CMP CMP AAS*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), + +/* INC EAX INC ECX INC EDX INC EBX*/ +/*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* INC ESP INC EBP INC ESI INC EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* DEC EAX DEC ECX DEC EDX DEC EBX*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* DEC ESP DEC EBP DEC ESI DEC EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ +/*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* POP EAX POP ECX POP EDX POP EBX*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* POP ESP POP EBP POP ESI POP EDI*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* PUSHA POPA BOUND ARPL*/ +/*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7), + INVALID, INVALID, INVALID, INVALID, +/* PUSH imm IMUL PUSH imm IMUL*/ + PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), +/* INSB INSW OUTSB OUTSW*/ + PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13), + +/* Jxx*/ +/*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, + +/*80*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* TEST TEST XCHG XCHG*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MOV MOV MOV MOV*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* MOV from seg LEA MOV to seg POP*/ + PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + +/* NOP XCHG XCHG XCHG*/ +/*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* XCHG XCHG XCHG XCHG*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), +/* CBW CWD CALL far WAIT*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1), +/* PUSHF POPF SAHF LAHF*/ + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/* MOV MOV MOV MOV*/ +/*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* MOVSB MOVSW CMPSB CMPSW*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* TEST TEST STOSB STOSW*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* LODSB LODSW SCASB SCASW*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), + +/* MOV*/ +/*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/* RET imm RET*/ +/*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), +/* LES LDS MOV MOV*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, +/* ENTER LEAVE RETF RETF*/ + PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* INT3 INT INTO IRET*/ + PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), + + +/*d0*/ INVALID, INVALID, INVALID, INVALID, +/* AAM AAD SETALC XLAT*/ + PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* LOOPNE LOOPE LOOP JCXZ*/ +/*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), +/* CALL JMP JMP JMP*/ + PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG, +/* IN AL IN AX OUT_AL OUT_AX*/ + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), + +/* REPNE REPE*/ +/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), +/* HLT CMC*/ + PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID, +/* CLC STC CLI STI*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), +/* CLD STD INCDEC*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_REG, INVALID +}; + +static uint64_t opcode_timings_0f[256] = +{ +/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), + INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, + PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ PAIR_NP | CYCLES(9), CYCLES(1), PAIR_NP | CYCLES(9), INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + +/*70*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_NP | CYCLES(100), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + +/*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + +/*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID, + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10), + +/*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + +/*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + +/*d0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, + +/*e0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, + INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, + +/*f0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, + INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, + PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, +}; +static uint64_t opcode_timings_0f_mod3[256] = +{ +/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), + INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, + PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*10*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*30*/ PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(9), INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*50*/ INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/*60*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/*70*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(100), + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + +/*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), + +/*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + +/*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID, + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10), + +/*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), + PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), + +/*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), + +/*d0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, + +/*e0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, + INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, + +/*f0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, +}; + +static uint64_t opcode_timings_shift[8] = +{ + PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, + PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, +}; +static uint64_t opcode_timings_shift_mod3[8] = +{ + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, + PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, +}; + +static uint64_t opcode_timings_f6[8] = +{ +/* TST NOT NEG*/ + PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) +}; +static uint64_t opcode_timings_f6_mod3[8] = +{ +/* TST NOT NEG*/ + PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) +}; +static uint64_t opcode_timings_f7[8] = +{ +/* TST NOT NEG*/ + PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) +}; +static uint64_t opcode_timings_f7_mod3[8] = +{ +/* TST NOT NEG*/ + PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), +/* MUL IMUL DIV IDIV*/ + PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) +}; +static uint64_t opcode_timings_ff[8] = +{ +/* INC DEC CALL CALL far*/ + PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), +/* JMP JMP far PUSH*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID +}; +static uint64_t opcode_timings_ff_mod3[8] = +{ +/* INC DEC CALL CALL far*/ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), +/* JMP JMP far PUSH*/ + PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID +}; + +static uint64_t opcode_timings_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), +/* FSUBs FSUBRs FDIVs FDIVRs*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) +}; +static uint64_t opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), +/* FSUB FSUBR FDIV FDIVR*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) +}; + +static uint64_t opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), +/* FLDENV FLDCW FSTENV FSTCW*/ + PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) +}; +static uint64_t opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), + PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), + /*FXCH*/ + PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), + PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), + /*FNOP*/ + PAIR_NP | FPU_CYCLES(3,0,0), INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), + PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), +/* opFCHS opFABS*/ + PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), INVALID, INVALID, +/* opFTST opFXAM*/ + PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(21,4,0), INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), +/* opFLDEG2 opFLDLN2 opFLDZ*/ + PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(2,0,0), INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + PAIR_NP | FPU_CYCLES(53,2,2), PAIR_NP | FPU_CYCLES(103,2,2), PAIR_NP | FPU_CYCLES(120,36,0), PAIR_NP | FPU_CYCLES(112,2,2), +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), +/* opFPREM opFSQRT opFSINCOS*/ + PAIR_NP | FPU_CYCLES(64,2,2), INVALID, PAIR_NP | FPU_CYCLES(70,69,2), PAIR_NP | FPU_CYCLES(89,2,2), +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + PAIR_NP | FPU_CYCLES(9,0,0), PAIR_NP | FPU_CYCLES(20,5,0), PAIR_NP | FPU_CYCLES(65,2,2), PAIR_NP | FPU_CYCLES(65,2,2) +}; + +static uint64_t opcode_timings_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) +}; +static uint64_t opcode_timings_da_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID +}; + + +static uint64_t opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil*/ + PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), +/* FLDe FSTPe*/ + INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) +}; +static uint64_t opcode_timings_db_mod3[64] = +{ + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(7,0,0), PAIR_NP | FPU_CYCLES(17,0,0), +/* opFNOP opFNOP*/ + PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +}; + +static uint64_t opcode_timings_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), +/* FSUBd FSUBRd FDIVd FDIVRd*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) +}; +static uint64_t opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) +}; + +static uint64_t opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), +/* FRSTOR FSAVE FSTSW*/ + PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) +}; +static uint64_t opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), +/* FUCOM FUCOMP*/ + PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID +}; + +static uint64_t opcode_timings_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) +}; +static uint64_t opcode_timings_de_mod3[8] = +{ +/* FADDP FMULP FCOMPP*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), +/* FSUBP FSUBRP FDIVP FDIVRP*/ + PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) +}; + +static uint64_t opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), +/* FILDiq FBSTP FISTPiq*/ + INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) +}; +static uint64_t opcode_timings_df_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + PAIR_NP | FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID +}; + +static uint64_t opcode_timings_81[8] = +{ + PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, + PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 +}; +static uint64_t opcode_timings_81_mod3[8] = +{ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG +}; +static uint64_t opcode_timings_8x[8] = +{ + PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, + PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 +}; +static uint64_t opcode_timings_8x_mod3[8] = +{ + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, + PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG +}; + +static int decode_delay, decode_delay_offset; +static uint8_t last_prefix; +static int prefixes; + +static inline int COUNT(uint64_t timings, uint64_t deps, int op_32) +{ + if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) + return FPU_I_LATENCY(timings); + if (timings & CYCLES_HAS_MULTI) + { + if (op_32 & 0x100) + return ((uintptr_t)timings >> 8) & 0xff; + return (uintptr_t)timings & 0xff; + } + if (!(timings & PAIR_MASK)) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FX) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FXCH) + return timings & 0xffff; + if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) + timings &= 3; + switch (timings & CYCLES_MASK) + { + case CYCLES_REG: + return 1; + case CYCLES_RM: + return 2; + case CYCLES_RMW: + return 3; + case CYCLES_BRANCH: + return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; + } + + fatal("Illegal COUNT %016llx\n", timings); + + return timings; +} + +static int codegen_fpu_latencies(uint64_t deps, int reg) +{ + int latency = fpu_latency; + + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; + + return latency; +} + +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 + +static void codegen_fpu_latency_clock(int count) +{ + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); +} + +static inline int codegen_timing_has_displacement(uint32_t fetchdat, int op_32) +{ + if (op_32 & 0x200) + { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) + { + /*Has SIB*/ + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) + return 1; + } + else + { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) + return 1; + } + } + else + { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) + return 1; + } + return 0; +} + +/*The instruction is only of interest here if it's longer than 7 bytes, as that's the + limit on Pentium MMX parallel decoding*/ +static inline int codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) +{ + int len = prefixes; + if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) + { + len += 2; /*Opcode + ModR/M*/ + if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) + len++; + if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; + + if (op_32 & 0x200) + { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) + { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } + else + { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; + } + } + + return len; +} + +void codegen_timing_pentium_block_start() +{ + u_pipe_full = decode_delay = decode_delay_offset = 0; +} + +void codegen_timing_pentium_start() +{ + last_prefix = 0; + prefixes = 0; +} + +void codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) +{ + prefixes++; + if ((prefix & 0xf8) == 0xd8) + { + last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) + { + /*On Pentium MMX 0fh prefix is 'free'*/ + last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) + { + /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ + decode_delay_offset += 2; + last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) + { + /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; +} + +static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +{ + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); + + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; +} + +static void codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +{ + int instr_cycles, latency = 0; + + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else + { +/* if (timings[opcode] & FPU_WRITE_ST0) + fatal("FPU_WRITE_ST0\n"); + if (timings[opcode] & FPU_WRITE_ST1) + fatal("FPU_WRITE_ST1\n"); + if (timings[opcode] & FPU_WRITE_STREG) + fatal("FPU_WRITE_STREG\n");*/ + instr_cycles = 0; + } + + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; + + decode_delay = (-instr_cycles) + 1; + + if (deps[opcode] & FPU_POP) + { + int c; + + for (c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c+1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) + { + int c; + + for (c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c+2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) + { + /* if (fpu_latency) + fatal("Bad latency FPU\n");*/ + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) + { + int c; + + for (c = 0; c < 7; c++) + fpu_st_latency[c+1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) + { +/* if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) + { +/* if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) + { + int reg = fetchdat & 7; + if (deps[opcode] & FPU_POP) + reg--; + if (reg >= 0 && + !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && + !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) + { +/* if (fpu_st_latency[reg]) + fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); + } + } +} + +void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + uint64_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; + + switch (last_prefix) + { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xc1: case 0xd0: case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (u_pipe_full) + { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); + + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && + (timings[opcode] & PAIR_MASK) != PAIR_FXCH) + goto nopair; + + if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && + (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) + goto nopair; + + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && + (timings[opcode] & PAIR_MASK) == PAIR_FXCH) + { + int temp; + + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + + temp = fpu_st_latency[fetchdat & 7]; + fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; + fpu_st_latency[0] = temp; + + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + return; + } + + if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) <= 0) + { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) + { + int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; + int t2 = timings[opcode] & CYCLES_MASK; + int t_pair; + uint64_t temp_timing; + uint64_t temp_deps = 0; + + if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) + t1 &= 3; + if (!(timings[opcode] & PAIR_FPU)) + t2 &= 3; + + if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) + fatal("Pair out of range\n"); + + t_pair = pair_timings[t1][t2]; + if (t_pair < 1) + fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); + + /*Instruction can pair with previous*/ + temp_timing = t_pair; + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + addr_regmask = 0; + return; + } + } +nopair: + /*Instruction can not pair with previous*/ + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + } + + if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) + { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) + { + /*Instruction might pair with next*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + } + /*Instruction can not pair and must run now*/ + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + addr_regmask = 0; +} + +void codegen_timing_pentium_block_end() +{ + if (u_pipe_full) + { + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + codegen_block_cycles++; + codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_pentium = +{ + codegen_timing_pentium_start, + codegen_timing_pentium_prefix, + codegen_timing_pentium_opcode, + codegen_timing_pentium_block_start, + codegen_timing_pentium_block_end, + NULL +}; diff --git a/src/codegen/codegen_timing_winchip.c b/src/codegen/codegen_timing_winchip.c new file mode 100644 index 000000000..dd2123f49 --- /dev/null +++ b/src/codegen/codegen_timing_winchip.c @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include <86box/mem.h> +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +#define CYCLES(c) (int *)c +#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) + +static int *opcode_timings[256] = +{ +/*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, +/*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), +/*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), +/*30*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), + +/*40*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES2(17,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, + +/*80*/ &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm, &timing_rm, CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(5), CYCLES(6), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), &timing_int, &timing_int, CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL +}; + +static int *opcode_timings_mod3[256] = +{ +/*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, +/*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), +/*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), +/*30*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(2), + +/*40*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES2(14,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, + +/*80*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(2), CYCLES(1), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), &timing_int, &timing_int, CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL +}; + +static int *opcode_timings_0f[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, +/*70*/ NULL, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, &timing_rm, CYCLES(100), NULL, NULL, NULL, NULL, NULL, NULL, &timing_rm, &timing_rm, + +/*80*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), NULL, NULL, CYCLES(3), CYCLES(3), NULL, CYCLES(13), CYCLES(3), CYCLES(3), NULL, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), NULL, NULL, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), NULL, NULL, NULL, NULL, NULL, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, +/*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, +/*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, +}; +static int *opcode_timings_0f_mod3[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, +/*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + +/*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/*60*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, +/*70*/ NULL, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(100), NULL, NULL, NULL, NULL, NULL, NULL, &timing_rr, &timing_rr, + +/*80*/ &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, &timing_bnt, +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), NULL, NULL, CYCLES(3), CYCLES(3), NULL, CYCLES(13), CYCLES(3), CYCLES(3), NULL, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), NULL, NULL, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), NULL, NULL, NULL, NULL, NULL, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, +/*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, +/*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, +}; + +static int *opcode_timings_shift[8] = +{ + CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) +}; +static int *opcode_timings_shift_mod3[8] = +{ + CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) +}; + +static int *opcode_timings_f6[8] = +{ + &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static int *opcode_timings_f6_mod3[8] = +{ + &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static int *opcode_timings_f7[8] = +{ + &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static int *opcode_timings_f7_mod3[8] = +{ + &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static int *opcode_timings_ff[8] = +{ + &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL +}; +static int *opcode_timings_ff_mod3[8] = +{ + &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL +}; + +static int *opcode_timings_d8[8] = +{ +/* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) +}; +static int *opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ + CYCLES(4), CYCLES(6), CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) +}; + +static int *opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ + CYCLES(2), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) +}; +static int *opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), + /*FXCH*/ + CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), + /*FNOP*/ + CYCLES(7), NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /*FSTP*/ + CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/* opFCHS opFABS opFTST opFXAM*/ + CYCLES(2), CYCLES(2), NULL, NULL, CYCLES(5), CYCLES(7), NULL, NULL, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI opFLDEG2 opFLDLN2 opFLDZ*/ + CYCLES(5), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(5), NULL, +/* opF2XM1 opFYL2X opFPTAN opFPATAN opFDECSTP opFINCSTP,*/ + CYCLES(300), CYCLES(58), CYCLES(676), CYCLES(355), NULL, NULL, CYCLES(3), CYCLES(3), +/* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ + CYCLES(70), NULL, CYCLES(72), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(474), CYCLES(474) +}; + +static int *opcode_timings_da[8] = +{ +/* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) +}; +static int *opcode_timings_da_mod3[8] = +{ + NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL +}; + + +static int *opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil FLDe FSTPe*/ + CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), NULL, CYCLES(8) +}; +static int *opcode_timings_db_mod3[64] = +{ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +/* opFNOP opFCLEX opFINIT opFNOP opFNOP*/ + NULL, CYCLES(7), CYCLES(18), CYCLES(27), CYCLES(7), CYCLES(7), NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +static int *opcode_timings_dc[8] = +{ +/* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ + CYCLES(6), CYCLES(8), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(74), CYCLES(74) +}; +static int *opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + CYCLES(4), CYCLES(6), NULL, NULL, CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) +}; + +static int *opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ + CYCLES(2), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(5) +}; +static int *opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP FUCOM FUCOMP*/ + CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL +}; + +static int *opcode_timings_de[8] = +{ +/* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ + CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) +}; +static int *opcode_timings_de_mod3[8] = +{ +/* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ + CYCLES(4), CYCLES(6), NULL, CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) +}; + +static int *opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ + CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), CYCLES(172), CYCLES(8) +}; +static int *opcode_timings_df_mod3[8] = +{ +/* FFREE FST FSTP FUCOM FUCOMP*/ + CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL +}; + +static int *opcode_timings_8x[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_8x_mod3[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_81[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; +static int *opcode_timings_81_mod3[8] = +{ + &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm +}; + +static int timing_count; +static uint8_t last_prefix; +static uint32_t regmask_modified; + +static inline int COUNT(int *c, int op_32) +{ + if ((uintptr_t)c <= 10000) + return (int)(uintptr_t)c; + if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) + { + if (op_32 & 0x100) + return ((uintptr_t)c >> 8) & 0xff; + return (uintptr_t)c & 0xff; + } + return *c; +} + +void codegen_timing_winchip_block_start() +{ + regmask_modified = 0; +} + +void codegen_timing_winchip_start() +{ + timing_count = 0; + last_prefix = 0; +} + +void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) +{ + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; +} + +void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + int **timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + + switch (last_prefix) + { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; + + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); +} + +void codegen_timing_winchip_block_end() +{ +} + +codegen_timing_t codegen_timing_winchip = +{ + codegen_timing_winchip_start, + codegen_timing_winchip_prefix, + codegen_timing_winchip_opcode, + codegen_timing_winchip_block_start, + codegen_timing_winchip_block_end, + NULL +}; diff --git a/src/codegen/codegen_timing_winchip2.c b/src/codegen/codegen_timing_winchip2.c new file mode 100644 index 000000000..06cc06697 --- /dev/null +++ b/src/codegen/codegen_timing_winchip2.c @@ -0,0 +1,743 @@ +/*Since IDT/Centaur didn't document cycle timings in the WinChip datasheets, and + I don't currently own a WinChip 2 to test against, most of the timing here is + a guess. This code makes the current (probably wrong) assumptions : + - FPU uses same timings as a Pentium, except for FXCH (which doesn't pair) + - 3DNow! instructions perfectly pair + - MMX follows mostly Pentium rules - one pipeline has shift/pack, one has + multiply, and other instructions can execute in either pipeline + - Instructions with prefixes can pair if both instructions are fully decoded + when the first instruction starts execution.*/ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> + +#include "x86.h" +#include "x86_ops.h" +#include "x87.h" +#include "codegen.h" +#include "codegen_ops.h" +#include "codegen_timing_common.h" + +/*Instruction has different execution time for 16 and 32 bit data. Does not pair */ +#define CYCLES_HAS_MULTI (1 << 31) + +#define CYCLES_FPU (1 << 30) + +#define CYCLES_IS_MMX_MUL (1 << 29) +#define CYCLES_IS_MMX_SHIFT (1 << 28) +#define CYCLES_IS_MMX_ANY (1 << 27) +#define CYCLES_IS_3DNOW (1 << 26) + +#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c) +#define CYCLES_MMX_SHIFT(c) (CYCLES_IS_MMX_SHIFT | c) +#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c) +#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c) + +#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW) + +#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX)) + +#define CYCLES(c) c +#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) + +/*comp_time = cycles until instruction complete + i_overlap = cycles that overlap with integer + f_overlap = cycles that overlap with subsequent FPU*/ +#define FPU_CYCLES(comp_time, i_overlap, f_overlap) (comp_time) | (i_overlap << 8) | (f_overlap << 16) | CYCLES_FPU + +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff) + +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) + +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) + +#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff) + +#define INVALID 0 + +static uint32_t opcode_timings[256] = +{ +/*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, +/*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), +/*20*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), +/*30*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), + +/*40*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES2(17,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), + +/*80*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(5), CYCLES(6), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID +}; + +static uint32_t opcode_timings_mod3[256] = +{ +/*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, +/*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), +/*20*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), +/*30*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), + +/*40*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*50*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*60*/ CYCLES(11), CYCLES(9), CYCLES(7), CYCLES(9), CYCLES(4), CYCLES(4), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES2(14,25), CYCLES(1), CYCLES2(17,20), CYCLES(17), CYCLES(17), CYCLES(17), CYCLES(17), +/*70*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), + +/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(1), CYCLES(2), CYCLES(1), +/*90*/ CYCLES(1), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(0), CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(2), CYCLES(3), +/*a0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(7), CYCLES(7), CYCLES(8), CYCLES(8), CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), +/*b0*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), + +/*c0*/ CYCLES(4), CYCLES(4), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(1), CYCLES(1), CYCLES(14), CYCLES(5), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(3), CYCLES(0), +/*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), +/*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID, +}; + +static uint32_t opcode_timings_0f[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), +/*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*60*/ CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), +/*70*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), + +/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), INVALID, INVALID, CYCLES(3), CYCLES(3), INVALID, CYCLES(13), CYCLES(3), CYCLES(3), INVALID, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), INVALID, INVALID, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), +/*e0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), +/*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, +}; +static uint32_t opcode_timings_0f_mod3[256] = +{ +/*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), +/*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*30*/ CYCLES(9), CYCLES(1), CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + +/*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, +/*60*/ CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), +/*70*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), + +/*80*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*90*/ CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3), +/*a0*/ CYCLES(3), CYCLES(3), CYCLES(14), CYCLES(8), CYCLES(3), CYCLES(4), INVALID, INVALID, CYCLES(3), CYCLES(3), INVALID, CYCLES(13), CYCLES(3), CYCLES(3), INVALID, CYCLES2(18,30), +/*b0*/ CYCLES(10), CYCLES(10), CYCLES(6), CYCLES(13), CYCLES(6), CYCLES(6), CYCLES(3), CYCLES(3), INVALID, INVALID, CYCLES(6), CYCLES(13), CYCLES(7), CYCLES(7), CYCLES(3), CYCLES(3), + +/*c0*/ CYCLES(4), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), +/*d0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), +/*e0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), +/*f0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, +}; + +static uint32_t opcode_timings_shift[8] = +{ + CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) +}; +static uint32_t opcode_timings_shift_mod3[8] = +{ + CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) +}; + +static uint32_t opcode_timings_f6[8] = +{ + CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static uint32_t opcode_timings_f6_mod3[8] = +{ + CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) +}; +static uint32_t opcode_timings_f7[8] = +{ + CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static uint32_t opcode_timings_f7_mod3[8] = +{ + CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) +}; +static uint32_t opcode_timings_ff[8] = +{ + CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID +}; +static uint32_t opcode_timings_ff_mod3[8] = +{ + CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID +}; + +static uint32_t opcode_timings_d8[8] = +{ +/* FADDs FMULs FCOMs FCOMPs*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), +/* FSUBs FSUBRs FDIVs FDIVRs*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) +}; +static uint32_t opcode_timings_d8_mod3[8] = +{ +/* FADD FMUL FCOM FCOMP*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), +/* FSUB FSUBR FDIV FDIVR*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) +}; + +static uint32_t opcode_timings_d9[8] = +{ +/* FLDs FSTs FSTPs*/ + FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), +/* FLDENV FLDCW FSTENV FSTCW*/ + FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0) +}; +static uint32_t opcode_timings_d9_mod3[64] = +{ + /*FLD*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), + /*FXCH*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), + /*FNOP*/ + FPU_CYCLES(3,0,0), INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + /*FSTP*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), +/* opFCHS opFABS*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), INVALID, INVALID, +/* opFTST opFXAM*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(21,4,0), INVALID, INVALID, +/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ + FPU_CYCLES(2,0,0), FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2), +/* opFLDEG2 opFLDLN2 opFLDZ*/ + FPU_CYCLES(5,2,2), FPU_CYCLES(5,2,2), FPU_CYCLES(2,0,0), INVALID, +/* opF2XM1 opFYL2X opFPTAN opFPATAN*/ + FPU_CYCLES(53,2,2), FPU_CYCLES(103,2,2),FPU_CYCLES(120,36,0),FPU_CYCLES(112,2,2), +/* opFDECSTP opFINCSTP,*/ + INVALID, INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), +/* opFPREM opFSQRT opFSINCOS*/ + FPU_CYCLES(64,2,2), INVALID, FPU_CYCLES(70,69,2),FPU_CYCLES(89,2,2), +/* opFRNDINT opFSCALE opFSIN opFCOS*/ + FPU_CYCLES(9,0,0), FPU_CYCLES(20,5,0), FPU_CYCLES(65,2,2), FPU_CYCLES(65,2,2) +}; + +static uint32_t opcode_timings_da[8] = +{ +/* FIADDl FIMULl FICOMl FICOMPl*/ + FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), +/* FISUBl FISUBRl FIDIVl FIDIVRl*/ + FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) +}; +static uint32_t opcode_timings_da_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FCOMPP*/ + INVALID, FPU_CYCLES(1,0,0), INVALID, INVALID +}; + + +static uint32_t opcode_timings_db[8] = +{ +/* FLDil FSTil FSTPil*/ + FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), +/* FLDe FSTPe*/ + INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0) +}; +static uint32_t opcode_timings_db_mod3[64] = +{ + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + +/* opFNOP opFCLEX opFINIT*/ + INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(7,0,0), FPU_CYCLES(17,0,0), +/* opFNOP opFNOP*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, + + INVALID, INVALID, INVALID, INVALID, + INVALID, INVALID, INVALID, INVALID, +}; + +static uint32_t opcode_timings_dc[8] = +{ +/* FADDd FMULd FCOMd FCOMPd*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), +/* FSUBd FSUBRd FDIVd FDIVRd*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) +}; +static uint32_t opcode_timings_dc_mod3[8] = +{ +/* opFADDr opFMULr*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID, +/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) +}; + +static uint32_t opcode_timings_dd[8] = +{ +/* FLDd FSTd FSTPd*/ + FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), +/* FRSTOR FSAVE FSTSW*/ + FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0) +}; +static uint32_t opcode_timings_dd_mod3[8] = +{ +/* FFFREE FST FSTP*/ + FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), +/* FUCOM FUCOMP*/ + FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),INVALID, INVALID +}; + +static uint32_t opcode_timings_de[8] = +{ +/* FIADDw FIMULw FICOMw FICOMPw*/ + FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), +/* FISUBw FISUBRw FIDIVw FIDIVRw*/ + FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) +}; +static uint32_t opcode_timings_de_mod3[8] = +{ +/* FADDP FMULP FCOMPP*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0), +/* FSUBP FSUBRP FDIVP FDIVRP*/ + FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) +}; + +static uint32_t opcode_timings_df[8] = +{ +/* FILDiw FISTiw FISTPiw*/ + FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), +/* FILDiq FBSTP FISTPiq*/ + INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0) +}; +static uint32_t opcode_timings_df_mod3[8] = +{ + INVALID, INVALID, INVALID, INVALID, +/* FSTSW AX*/ + FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID +}; + +static uint32_t opcode_timings_8x[8] = +{ + CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) +}; +static uint32_t opcode_timings_8x_mod3[8] = +{ + CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) +}; +static uint32_t opcode_timings_81[8] = +{ + CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) +}; +static uint32_t opcode_timings_81_mod3[8] = +{ + CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) +}; + +static int timing_count; +static uint8_t last_prefix; +static uint32_t regmask_modified; +static int decode_delay, decode_delay_offset; +static int fpu_latency; +static int fpu_st_latency[8]; + +static int u_pipe_full; +static uint32_t u_pipe_opcode; +static uint32_t *u_pipe_timings; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; +static uint64_t *u_pipe_deps; + +int can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b) +{ + /*Only MMX/3DNow instructions can pair*/ + if (!(timing_b & CYCLES_IS_MMX)) + return 0; + /*Only one MMX multiply per cycle*/ + if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL)) + return 0; + /*Only one MMX shift/pack per cycle*/ + if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT)) + return 0; + /*Second instruction can not access registers written by first*/ + if (u_pipe_regmask & regmask_b) + return 0; + /*Must have had enough time to decode prefixes*/ + if ((decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) > 0) + return 0; + + return 1; +} + +static inline int COUNT(uint32_t c, int op_32) +{ + if (c & CYCLES_FPU) + return FPU_I_LATENCY(c); + if (c & CYCLES_HAS_MULTI) + { + if (op_32 & 0x100) + return (c >> 8) & 0xff; + return c & 0xff; + } + return GET_CYCLES(c); +} + +static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +{ + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); + + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; +} + +static int codegen_fpu_latencies(uint64_t deps, int reg) +{ + int latency = fpu_latency; + + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; + + return latency; +} + +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 + +static void codegen_fpu_latency_clock(int count) +{ + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); +} + +static void codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +{ + int instr_cycles, latency = 0; + + if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else + instr_cycles = 0; + + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; + decode_delay = (-instr_cycles) + 1; + + if (deps[opcode] & FPU_POP) + { + int c; + + for (c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c+1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) + { + int c; + + for (c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c+2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if (timings[opcode] & CYCLES_FPU) + { + /* if (fpu_latency) + fatal("Bad latency FPU\n");*/ + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) + { + int c; + + for (c = 0; c < 7; c++) + fpu_st_latency[c+1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) + { +/* if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) + { +/* if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) + { + int reg = fetchdat & 7; + if (deps[opcode] & FPU_POP) + reg--; + if (reg >= 0 && + !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && + !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) + { +/* if (fpu_st_latency[reg]) + fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); + } + } +} + +static void codegen_timing_winchip2_block_start() +{ + regmask_modified = 0; + decode_delay = decode_delay_offset = 0; + u_pipe_full = 0; +} + +static void codegen_timing_winchip2_start() +{ + timing_count = 0; + last_prefix = 0; +} + +static void codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix == 0x0f) + { + /*0fh prefix is 'free'*/ + last_prefix = prefix; + return; + } + /*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; +} + +static void codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; + + switch (last_prefix) + { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; + + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) + { + case 0x80: case 0x82: case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; + + default: + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } + + if (u_pipe_full) + { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); + + if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask)) + { + int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff; + int cycles_b = timings[opcode] & 0xff; + uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode]; + uint64_t temp_deps = 0; + + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + + codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + return; + } + else + { + /*No pairing, run first instruction now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; + } + } + if (timings[opcode] & CYCLES_IS_MMX) + { + /*Might pair with next instruction*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); +} + +static void codegen_timing_winchip2_block_end() +{ + if (u_pipe_full) + { + int agi_stall = 0; + + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_winchip2 = +{ + codegen_timing_winchip2_start, + codegen_timing_winchip2_prefix, + codegen_timing_winchip2_opcode, + codegen_timing_winchip2_block_start, + codegen_timing_winchip2_block_end, + NULL +}; diff --git a/src/codegen/codegen_x86-64.c b/src/codegen/codegen_x86-64.c new file mode 100644 index 000000000..f674df1e4 --- /dev/null +++ b/src/codegen/codegen_x86-64.c @@ -0,0 +1,1178 @@ +#ifdef __amd64__ + +#include +#include +#include +#include +#define HAVE_STDARG_H +#include <86box/86box.h> +#include "cpu.h" +#include "x86.h" +#include "x86_flags.h" +#include "x86_ops.h" +#include "x87.h" +#include <86box/mem.h> + +#include "386_common.h" + +#include "codegen.h" +#include "codegen_accumulate.h" +#include "codegen_ops.h" +#include "codegen_ops_x86-64.h" + +#if defined(__linux__) || defined(__APPLE__) +#include +#include +#endif +#if WIN64 +#include +#endif + +int codegen_flat_ds, codegen_flat_ss; +int codegen_flags_changed = 0; +int codegen_fpu_entered = 0; +int codegen_fpu_loaded_iq[8]; +int codegen_reg_loaded[8]; +x86seg *op_ea_seg; +int op_ssegs; +uint32_t op_old_pc; + +uint32_t recomp_page = -1; + +int host_reg_mapping[NR_HOST_REGS]; +int host_reg_xmm_mapping[NR_HOST_XMM_REGS]; +codeblock_t *codeblock; +codeblock_t **codeblock_hash; +int codegen_mmx_entered = 0; + +int block_current = 0; +static int block_num; +int block_pos; + +int cpu_recomp_flushes, cpu_recomp_flushes_latched; +int cpu_recomp_evicted, cpu_recomp_evicted_latched; +int cpu_recomp_reuse, cpu_recomp_reuse_latched; +int cpu_recomp_removed, cpu_recomp_removed_latched; + +uint32_t codegen_endpc; + +int codegen_block_cycles; +static int codegen_block_ins; +static int codegen_block_full_ins; + +static uint32_t last_op32; +static x86seg *last_ea_seg; +static int last_ssegs; + +void codegen_init() +{ + int c; + +#if defined(__linux__) || defined(__APPLE__) + void *start; + size_t len; + long pagesize = sysconf(_SC_PAGESIZE); + long pagemask = ~(pagesize - 1); +#endif + +#if WIN64 + codeblock = VirtualAlloc(NULL, BLOCK_SIZE * sizeof(codeblock_t), MEM_COMMIT, PAGE_EXECUTE_READWRITE); +#else + codeblock = malloc(BLOCK_SIZE * sizeof(codeblock_t)); +#endif + codeblock_hash = malloc(HASH_SIZE * sizeof(codeblock_t *)); + + memset(codeblock, 0, BLOCK_SIZE * sizeof(codeblock_t)); + memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *)); + + for (c = 0; c < BLOCK_SIZE; c++) + codeblock[c].valid = 0; + +#if defined(__linux__) || defined(__APPLE__) + start = (void *)((long)codeblock & pagemask); + len = ((BLOCK_SIZE * sizeof(codeblock_t)) + pagesize) & pagemask; + if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) + { + perror("mprotect"); + exit(-1); + } +#endif +} + +void codegen_reset() +{ + int c; + + memset(codeblock, 0, BLOCK_SIZE * sizeof(codeblock_t)); + memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *)); + mem_reset_page_blocks(); + + for (c = 0; c < BLOCK_SIZE; c++) + codeblock[c].valid = 0; +} + +void dump_block() +{ +} + +static void add_to_block_list(codeblock_t *block) +{ + codeblock_t *block_prev = pages[block->phys >> 12].block[(block->phys >> 10) & 3]; + + if (!block->page_mask) + fatal("add_to_block_list - mask = 0\n"); + + if (block_prev) + { + block->next = block_prev; + block_prev->prev = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; + } + else + { + block->next = NULL; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; + } + + if (block->next) + { + if (block->next->valid == 0) + fatal("block->next->valid=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos); + } + + if (block->page_mask2) + { + block_prev = pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]; + + if (block_prev) + { + block->next_2 = block_prev; + block_prev->prev_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; + } + else + { + block->next_2 = NULL; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; + } + } +} + +static void remove_from_block_list(codeblock_t *block, uint32_t pc) +{ + if (!block->page_mask) + return; + + if (block->prev) + { + block->prev->next = block->next; + if (block->next) + block->next->prev = block->prev; + } + else + { + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block->next; + if (block->next) + block->next->prev = NULL; + else + mem_flush_write_page(block->phys, 0); + } + if (!block->page_mask2) + { + if (block->prev_2 || block->next_2) + fatal("Invalid block_2\n"); + return; + } + + if (block->prev_2) + { + block->prev_2->next_2 = block->next_2; + if (block->next_2) + block->next_2->prev_2 = block->prev_2; + } + else + { + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block->next_2; + if (block->next_2) + block->next_2->prev_2 = NULL; + else + mem_flush_write_page(block->phys_2, 0); + } +} + +static void delete_block(codeblock_t *block) +{ + uint32_t old_pc = block->pc; + + if (block == codeblock_hash[HASH(block->phys)]) + codeblock_hash[HASH(block->phys)] = NULL; + + if (block->valid == 0) + fatal("Deleting deleted block\n"); + block->valid = 0; + + codeblock_tree_delete(block); + remove_from_block_list(block, old_pc); +} + +void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) +{ + struct codeblock_t *block = page->block[(phys_addr >> 10) & 3]; + + while (block) + { + if (mask & block->page_mask) + { + delete_block(block); + cpu_recomp_evicted++; + } + if (block == block->next) + fatal("Broken 1\n"); + block = block->next; + } + + block = page->block_2[(phys_addr >> 10) & 3]; + + while (block) + { + if (mask & block->page_mask2) + { + delete_block(block); + cpu_recomp_evicted++; + } + if (block == block->next_2) + fatal("Broken 2\n"); + block = block->next_2; + } +} + +void codegen_block_init(uint32_t phys_addr) +{ + codeblock_t *block; + page_t *page = &pages[phys_addr >> 12]; + + if (!page->block[(phys_addr >> 10) & 3]) + mem_flush_write_page(phys_addr, cs+cpu_state.pc); + + block_current = (block_current + 1) & BLOCK_MASK; + block = &codeblock[block_current]; + + if (block->valid != 0) + { + delete_block(block); + cpu_recomp_reuse++; + } + block_num = HASH(phys_addr); + codeblock_hash[block_num] = &codeblock[block_current]; + + block->valid = 1; + block->ins = 0; + block->pc = cs + cpu_state.pc; + block->_cs = cs; + block->pnt = block_current; + block->phys = phys_addr; + block->dirty_mask = &page->dirty_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + block->dirty_mask2 = NULL; + block->next = block->prev = NULL; + block->next_2 = block->prev_2 = NULL; + block->page_mask = 0; + block->flags = 0; + block->status = cpu_cur_status; + + block->was_recompiled = 0; + + recomp_page = block->phys & ~0xfff; + + codeblock_tree_add(block); +} + +void codegen_block_start_recompile(codeblock_t *block) +{ + page_t *page = &pages[block->phys >> 12]; + + if (!page->block[(block->phys >> 10) & 3]) + mem_flush_write_page(block->phys, cs+cpu_state.pc); + + block_num = HASH(block->phys); + block_current = block->pnt; + + if (block->pc != cs + cpu_state.pc || block->was_recompiled) + fatal("Recompile to used block!\n"); + + block->status = cpu_cur_status; + + block_pos = BLOCK_GPF_OFFSET; +#if WIN64 + addbyte(0x48); /*XOR RCX, RCX*/ + addbyte(0x31); + addbyte(0xc9); + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); +#else + addbyte(0x48); /*XOR RDI, RDI*/ + addbyte(0x31); + addbyte(0xff); + addbyte(0x31); /*XOR ESI, ESI*/ + addbyte(0xf6); +#endif + call(block, (uintptr_t)x86gpf); + while (block_pos < BLOCK_EXIT_OFFSET) + addbyte(0x90); /*NOP*/ + block_pos = BLOCK_EXIT_OFFSET; /*Exit code*/ + addbyte(0x48); /*ADDL $40,%rsp*/ + addbyte(0x83); + addbyte(0xC4); + addbyte(0x28); + addbyte(0x41); /*POP R15*/ + addbyte(0x5f); + addbyte(0x41); /*POP R14*/ + addbyte(0x5e); + addbyte(0x41); /*POP R13*/ + addbyte(0x5d); + addbyte(0x41); /*POP R12*/ + addbyte(0x5c); + addbyte(0x5f); /*POP RDI*/ + addbyte(0x5e); /*POP RSI*/ + addbyte(0x5d); /*POP RBP*/ + addbyte(0x5b); /*POP RDX*/ + addbyte(0xC3); /*RET*/ + cpu_block_end = 0; + block_pos = 0; /*Entry code*/ + addbyte(0x53); /*PUSH RBX*/ + addbyte(0x55); /*PUSH RBP*/ + addbyte(0x56); /*PUSH RSI*/ + addbyte(0x57); /*PUSH RDI*/ + addbyte(0x41); /*PUSH R12*/ + addbyte(0x54); + addbyte(0x41); /*PUSH R13*/ + addbyte(0x55); + addbyte(0x41); /*PUSH R14*/ + addbyte(0x56); + addbyte(0x41); /*PUSH R15*/ + addbyte(0x57); + addbyte(0x48); /*SUBL $40,%rsp*/ + addbyte(0x83); + addbyte(0xEC); + addbyte(0x28); + addbyte(0x48); /*MOVL RBP, &cpu_state*/ + addbyte(0xBD); + addquad(((uintptr_t)&cpu_state) + 128); + + last_op32 = -1; + last_ea_seg = NULL; + last_ssegs = -1; + + codegen_block_cycles = 0; + codegen_timing_block_start(); + + codegen_block_ins = 0; + codegen_block_full_ins = 0; + + recomp_page = block->phys & ~0xfff; + + codegen_flags_changed = 0; + codegen_fpu_entered = 0; + codegen_mmx_entered = 0; + + codegen_fpu_loaded_iq[0] = codegen_fpu_loaded_iq[1] = codegen_fpu_loaded_iq[2] = codegen_fpu_loaded_iq[3] = + codegen_fpu_loaded_iq[4] = codegen_fpu_loaded_iq[5] = codegen_fpu_loaded_iq[6] = codegen_fpu_loaded_iq[7] = 0; + + cpu_state.seg_ds.checked = cpu_state.seg_es.checked = cpu_state.seg_fs.checked = cpu_state.seg_gs.checked = (cr0 & 1) ? 0 : 1; + + codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] = + codegen_reg_loaded[4] = codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0; + + block->was_recompiled = 1; + + codegen_flat_ds = !(cpu_cur_status & CPU_STATUS_NOTFLATDS); + codegen_flat_ss = !(cpu_cur_status & CPU_STATUS_NOTFLATSS); +} + +void codegen_block_remove() +{ + codeblock_t *block = &codeblock[block_current]; + + delete_block(block); + cpu_recomp_removed++; + + recomp_page = -1; +} + +void codegen_block_generate_end_mask() +{ + codeblock_t *block = &codeblock[block_current]; + uint32_t start_pc; + uint32_t end_pc; + + block->endpc = codegen_endpc; + + block->page_mask = 0; + start_pc = (block->pc & 0x3ff) & ~15; + if ((block->pc ^ block->endpc) & ~0x3ff) + end_pc = 0x3ff & ~15; + else + end_pc = (block->endpc & 0x3ff) & ~15; + if (end_pc < start_pc) + end_pc = 0x3ff; + start_pc >>= PAGE_MASK_SHIFT; + end_pc >>= PAGE_MASK_SHIFT; + + for (; start_pc <= end_pc; start_pc++) + block->page_mask |= ((uint64_t)1 << start_pc); + + pages[block->phys >> 12].code_present_mask[(block->phys >> 10) & 3] |= block->page_mask; + + block->phys_2 = -1; + block->page_mask2 = 0; + block->next_2 = block->prev_2 = NULL; + if ((block->pc ^ block->endpc) & ~0x3ff) + { + block->phys_2 = get_phys_noabrt(block->endpc); + if (block->phys_2 != -1) + { + page_t *page_2 = &pages[block->phys_2 >> 12]; + + start_pc = 0; + end_pc = (block->endpc & 0x3ff) >> PAGE_MASK_SHIFT; + for (; start_pc <= end_pc; start_pc++) + block->page_mask2 |= ((uint64_t)1 << start_pc); + page_2->code_present_mask[(block->phys_2 >> 10) & 3] |= block->page_mask2; + + if (!pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]) + mem_flush_write_page(block->phys_2, block->endpc); + + if (!block->page_mask2) + fatal("!page_mask2\n"); + if (block->next_2) + { + if (block->next_2->valid == 0) + fatal("block->next_2->valid=0 %p\n", (void *)block->next_2); + } + + block->dirty_mask2 = &page_2->dirty_mask[(block->phys_2 >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + } + } + + recomp_page = -1; +} + +void codegen_block_end() +{ + codeblock_t *block = &codeblock[block_current]; + + codegen_block_generate_end_mask(); + add_to_block_list(block); +} + +void codegen_block_end_recompile(codeblock_t *block) +{ + codegen_timing_block_end(); + codegen_accumulate(ACCREG_cycles, -codegen_block_cycles); + + codegen_accumulate_flush(); + + addbyte(0x48); /*ADDL $40,%rsp*/ + addbyte(0x83); + addbyte(0xC4); + addbyte(0x28); + addbyte(0x41); /*POP R15*/ + addbyte(0x5f); + addbyte(0x41); /*POP R14*/ + addbyte(0x5e); + addbyte(0x41); /*POP R13*/ + addbyte(0x5d); + addbyte(0x41); /*POP R12*/ + addbyte(0x5c); + addbyte(0x5f); /*POP RDI*/ + addbyte(0x5e); /*POP RSI*/ + addbyte(0x5d); /*POP RBP*/ + addbyte(0x5b); /*POP RDX*/ + addbyte(0xC3); /*RET*/ + + if (block_pos > BLOCK_GPF_OFFSET) + fatal("Over limit!\n"); + + remove_from_block_list(block, block->pc); + block->next = block->prev = NULL; + block->next_2 = block->prev_2 = NULL; + codegen_block_generate_end_mask(); + add_to_block_list(block); +} + +void codegen_flush() +{ + return; +} + +static int opcode_modrm[256] = +{ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*00*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*10*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*20*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*30*/ + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/ + 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, /*60*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70*/ + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*80*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*90*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*a0*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*b0*/ + + 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, /*d0*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*e0*/ + 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, /*f0*/ +}; +int opcode_0f_modrm[256] = +{ + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, /*00*/ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, /*10*/ + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30*/ + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*40*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, /*60*/ + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, /*70*/ + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*80*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*90*/ + 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, /*a0*/ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, /*b0*/ + + 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/ + 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*d0*/ + 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*e0*/ + 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0 /*f0*/ +}; + +void codegen_debug() +{ +} + +static x86seg *codegen_generate_ea_16_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc) +{ + if (!cpu_mod && cpu_rm == 6) + { + addbyte(0xC7); /*MOVL $0,(ssegs)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + addlong((fetchdat >> 8) & 0xffff); + (*op_pc) += 2; + } + else + { + int base_reg = 0, index_reg = 0; + + switch (cpu_rm) + { + case 0: case 1: case 7: + base_reg = LOAD_REG_W(REG_BX); + break; + case 2: case 3: case 6: + base_reg = LOAD_REG_W(REG_BP); + break; + case 4: + base_reg = LOAD_REG_W(REG_SI); + break; + case 5: + base_reg = LOAD_REG_W(REG_DI); + break; + } + if (!(cpu_rm & 4)) + { + if (cpu_rm & 1) + index_reg = LOAD_REG_W(REG_DI); + else + index_reg = LOAD_REG_W(REG_SI); + } + base_reg &= 7; + index_reg &= 7; + + switch (cpu_mod) + { + case 0: + if (cpu_rm & 4) + { + addbyte(0x41); /*MOVZX EAX, base_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xc0 | base_reg); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg*/ + addbyte(0x43); + addbyte(0x8d); + if (base_reg == 5) + { + addbyte(0x44); + addbyte(base_reg | (index_reg << 3)); + addbyte(0); + } + else + { + addbyte(0x04); + addbyte(base_reg | (index_reg << 3)); + } + } + break; + case 1: + if (cpu_rm & 4) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x40 | base_reg); + addbyte((fetchdat >> 8) & 0xff); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm8*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x44); + addbyte(base_reg | (index_reg << 3)); + addbyte((fetchdat >> 8) & 0xff); + } + (*op_pc)++; + break; + case 2: + if (cpu_rm & 4) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | base_reg); + addlong((fetchdat >> 8) & 0xffff); + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg+imm16*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x84); + addbyte(base_reg | (index_reg << 3)); + addlong((fetchdat >> 8) & 0xffff); + } + (*op_pc) += 2; + break; + + } + if (cpu_mod || !(cpu_rm & 4)) + { + addbyte(0x25); /*ANDL $0xffff, %eax*/ + addlong(0xffff); + } + addbyte(0x89); /*MOV eaaddr, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + + if (mod1seg[cpu_rm] == &ss && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + } + return op_ea_seg; +} +//#if 0 +static x86seg *codegen_generate_ea_32_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, int stack_offset) +{ + uint32_t new_eaaddr; + + if (cpu_rm == 4) + { + uint8_t sib = fetchdat >> 8; + int base_reg = -1, index_reg = -1; + + (*op_pc)++; + + if (cpu_mod || (sib & 7) != 5) + base_reg = LOAD_REG_L(sib & 7) & 7; + + if (((sib >> 3) & 7) != 4) + index_reg = LOAD_REG_L((sib >> 3) & 7) & 7; + + if (index_reg == -1) + { + switch (cpu_mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOV EAX, imm32*/ + addlong(new_eaaddr); + (*op_pc) += 4; + } + else + { + addbyte(0x44); /*MOV EAX, base_reg*/ + addbyte(0x89); + addbyte(0xc0 | (base_reg << 3)); + } + break; + case 1: + addbyte(0x67); /*LEA EAX, imm8+base_reg*/ + addbyte(0x41); + addbyte(0x8d); + if (base_reg == 4) + { + addbyte(0x44); + addbyte(0x24); + } + else + { + addbyte(0x40 | base_reg); + } + addbyte((fetchdat >> 16) & 0xff); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, imm32+base_reg*/ + addbyte(0x41); + addbyte(0x8d); + if (base_reg == 4) + { + addbyte(0x84); + addbyte(0x24); + } + else + { + addbyte(0x80 | base_reg); + } + addlong(new_eaaddr); + (*op_pc) += 4; + break; + } + } + else + { + switch (cpu_mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + if (sib >> 6) + { + addbyte(0x67); /*LEA EAX, imm32+index_reg*scale*/ + addbyte(0x42); + addbyte(0x8d); + addbyte(0x04); + addbyte(0x05 | (sib & 0xc0) | (index_reg << 3)); + addlong(new_eaaddr); + } + else + { + addbyte(0x67); /*LEA EAX, imm32+index_reg*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | index_reg); + addlong(new_eaaddr); + } + (*op_pc) += 4; + } + else + { + addbyte(0x67); /*LEA EAX, base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + if (base_reg == 5) + { + addbyte(0x44); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addbyte(0); + } + else + { + addbyte(0x04); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + } + } + break; + case 1: + addbyte(0x67); /*LEA EAX, imm8+base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x44); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addbyte((fetchdat >> 16) & 0xff); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, imm32+base_reg+index_reg*scale*/ + addbyte(0x43); + addbyte(0x8d); + addbyte(0x84); + addbyte(base_reg | (index_reg << 3) | (sib & 0xc0)); + addlong(new_eaaddr); + (*op_pc) += 4; + break; + } + } + if (stack_offset && (sib & 7) == 4 && (cpu_mod || (sib & 7) != 5)) /*ESP*/ + { + addbyte(0x05); + addlong(stack_offset); + } + if (((sib & 7) == 4 || (cpu_mod && (sib & 7) == 5)) && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + + addbyte(0x89); /*MOV eaaddr, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + } + else + { + int base_reg; + + if (!cpu_mod && cpu_rm == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xC7); /*MOVL $new_eaaddr,(eaaddr)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + addlong(new_eaaddr); + (*op_pc) += 4; + return op_ea_seg; + } + base_reg = LOAD_REG_L(cpu_rm) & 7; + if (cpu_mod) + { + if (cpu_rm == 5 && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (cpu_mod == 1) + { + addbyte(0x67); /*LEA EAX, base_reg+imm8*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x40 | base_reg); + addbyte((fetchdat >> 8) & 0xff); + (*op_pc)++; + } + else + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x67); /*LEA EAX, base_reg+imm32*/ + addbyte(0x41); + addbyte(0x8d); + addbyte(0x80 | base_reg); + addlong(new_eaaddr); + (*op_pc) += 4; + } + addbyte(0x89); /*MOV eaaddr, EAX*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + } + else + { + addbyte(0x44); /*MOV eaaddr, base_reg*/ + addbyte(0x89); + addbyte(0x45 | (base_reg << 3)); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + } + } + return op_ea_seg; +} +//#endif +void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc) +{ + codeblock_t *block = &codeblock[block_current]; + uint32_t op_32 = use32; + uint32_t op_pc = new_pc; + const OpFn *op_table = (OpFn *) x86_dynarec_opcodes; + RecompOpFn *recomp_op_table = recomp_opcodes; + int opcode_shift = 0; + int opcode_mask = 0x3ff; + int over = 0; + int pc_off = 0; + int test_modrm = 1; + int c; + + op_ea_seg = &cpu_state.seg_ds; + op_ssegs = 0; + op_old_pc = old_pc; + + for (c = 0; c < NR_HOST_REGS; c++) + host_reg_mapping[c] = -1; + for (c = 0; c < NR_HOST_XMM_REGS; c++) + host_reg_xmm_mapping[c] = -1; + + codegen_timing_start(); + + while (!over) + { + switch (opcode) + { + case 0x0f: + op_table = x86_dynarec_opcodes_0f; + recomp_op_table = recomp_opcodes_0f; + over = 1; + break; + + case 0x26: /*ES:*/ + op_ea_seg = &cpu_state.seg_es; + op_ssegs = 1; + break; + case 0x2e: /*CS:*/ + op_ea_seg = &cpu_state.seg_cs; + op_ssegs = 1; + break; + case 0x36: /*SS:*/ + op_ea_seg = &cpu_state.seg_ss; + op_ssegs = 1; + break; + case 0x3e: /*DS:*/ + op_ea_seg = &cpu_state.seg_ds; + op_ssegs = 1; + break; + case 0x64: /*FS:*/ + op_ea_seg = &cpu_state.seg_fs; + op_ssegs = 1; + break; + case 0x65: /*GS:*/ + op_ea_seg = &cpu_state.seg_gs; + op_ssegs = 1; + break; + + case 0x66: /*Data size select*/ + op_32 = ((use32 & 0x100) ^ 0x100) | (op_32 & 0x200); + break; + case 0x67: /*Address size select*/ + op_32 = ((use32 & 0x200) ^ 0x200) | (op_32 & 0x100); + break; + + case 0xd8: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d8_a32 : x86_dynarec_opcodes_d8_a16; + recomp_op_table = recomp_opcodes_d8; + opcode_shift = 3; + opcode_mask = 0x1f; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xd9: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d9_a32 : x86_dynarec_opcodes_d9_a16; + recomp_op_table = recomp_opcodes_d9; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xda: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_da_a32 : x86_dynarec_opcodes_da_a16; + recomp_op_table = recomp_opcodes_da; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdb: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_db_a32 : x86_dynarec_opcodes_db_a16; + recomp_op_table = recomp_opcodes_db; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdc: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dc_a32 : x86_dynarec_opcodes_dc_a16; + recomp_op_table = recomp_opcodes_dc; + opcode_shift = 3; + opcode_mask = 0x1f; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdd: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dd_a32 : x86_dynarec_opcodes_dd_a16; + recomp_op_table = recomp_opcodes_dd; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xde: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_de_a32 : x86_dynarec_opcodes_de_a16; + recomp_op_table = recomp_opcodes_de; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdf: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_df_a32 : x86_dynarec_opcodes_df_a16; + recomp_op_table = recomp_opcodes_df; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + + case 0xf0: /*LOCK*/ + break; + + case 0xf2: /*REPNE*/ + op_table = x86_dynarec_opcodes_REPNE; + recomp_op_table = recomp_opcodes_REPNE; + break; + case 0xf3: /*REPE*/ + op_table = x86_dynarec_opcodes_REPE; + recomp_op_table = recomp_opcodes_REPE; + break; + + default: + goto generate_call; + } + fetchdat = fastreadl(cs + op_pc); + codegen_timing_prefix(opcode, fetchdat); + if (cpu_state.abrt) + return; + opcode = fetchdat & 0xff; + if (!pc_off) + fetchdat >>= 8; + op_pc++; + } + +generate_call: + codegen_timing_opcode(opcode, fetchdat, op_32, op_pc); + + codegen_accumulate(ACCREG_ins, 1); + codegen_accumulate(ACCREG_cycles, -codegen_block_cycles); + codegen_block_cycles = 0; + + if ((op_table == x86_dynarec_opcodes && + ((opcode & 0xf0) == 0x70 || (opcode & 0xfc) == 0xe0 || opcode == 0xc2 || + (opcode & 0xfe) == 0xca || (opcode & 0xfc) == 0xcc || (opcode & 0xfc) == 0xe8 || + (opcode == 0xff && ((fetchdat & 0x38) >= 0x10 && (fetchdat & 0x38) < 0x30)))) || + (op_table == x86_dynarec_opcodes_0f && ((opcode & 0xf0) == 0x80))) + { + /*On some CPUs (eg K6), a jump/branch instruction may be able to pair with + subsequent instructions, so no cycles may have been deducted for it yet. + To prevent having zero cycle blocks (eg with a jump instruction pointing + to itself), apply the cycles that would be taken if this jump is taken, + then reverse it for subsequent instructions if the jump is not taken*/ + int jump_cycles = 0; + + if (codegen_timing_jump_cycles != NULL) + jump_cycles = codegen_timing_jump_cycles(); + + if (jump_cycles) + codegen_accumulate(ACCREG_cycles, -jump_cycles); + codegen_accumulate_flush(); + if (jump_cycles) + codegen_accumulate(ACCREG_cycles, jump_cycles); + } + + if ((op_table == x86_dynarec_opcodes_REPNE || op_table == x86_dynarec_opcodes_REPE) && !op_table[opcode | op_32]) + { + op_table = x86_dynarec_opcodes; + recomp_op_table = recomp_opcodes; + } + + if (recomp_op_table && recomp_op_table[(opcode | op_32) & 0x1ff]) + { + uint32_t new_pc = recomp_op_table[(opcode | op_32) & 0x1ff](opcode, fetchdat, op_32, op_pc, block); + if (new_pc) + { + if (new_pc != -1) + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, new_pc); + + codegen_block_ins++; + block->ins++; + codegen_block_full_ins++; + codegen_endpc = (cs + cpu_state.pc) + 8; + + return; + } + } + + op = op_table[((opcode >> opcode_shift) | op_32) & opcode_mask]; + if (op_ssegs != last_ssegs) + { + last_ssegs = op_ssegs; + addbyte(0xC6); /*MOVB $0,(ssegs)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ssegs)); + addbyte(op_ssegs); + } + if ((!test_modrm || + (op_table == x86_dynarec_opcodes && opcode_modrm[opcode]) || + (op_table == x86_dynarec_opcodes_0f && opcode_0f_modrm[opcode]))/* && !(op_32 & 0x200)*/) + { + int stack_offset = 0; + + if (op_table == x86_dynarec_opcodes && opcode == 0x8f) /*POP*/ + stack_offset = (op_32 & 0x100) ? 4 : 2; + + cpu_mod = (fetchdat >> 6) & 3; + cpu_reg = (fetchdat >> 3) & 7; + cpu_rm = fetchdat & 7; + + addbyte(0xC7); /*MOVL $rm | mod | reg,(rm_mod_reg_data)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(rm_data.rm_mod_reg_data)); + addlong(cpu_rm | (cpu_mod << 8) | (cpu_reg << 16)); + + op_pc += pc_off; + if (cpu_mod != 3 && !(op_32 & 0x200)) + op_ea_seg = codegen_generate_ea_16_long(op_ea_seg, fetchdat, op_ssegs, &op_pc); + if (cpu_mod != 3 && (op_32 & 0x200)) + op_ea_seg = codegen_generate_ea_32_long(op_ea_seg, fetchdat, op_ssegs, &op_pc, stack_offset); + op_pc -= pc_off; + } + if (op_ea_seg != last_ea_seg) + { + addbyte(0xC7); /*MOVL $&_ds,(ea_seg)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ea_seg)); + addlong((uint32_t)(uintptr_t)op_ea_seg); + } + + codegen_accumulate_flush(); + + addbyte(0xC7); /*MOVL [pc],new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc + pc_off); + addbyte(0xC7); /*MOVL $old_pc,(oldpc)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(old_pc); + if (op_32 != last_op32) + { + last_op32 = op_32; + addbyte(0xC7); /*MOVL $use32,(op32)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(op32)); + addlong(op_32); + } + + load_param_1_32(block, fetchdat); + call(block, (uintptr_t)op); + + codegen_block_ins++; + + block->ins++; + + addbyte(0x85); /*OR %eax, %eax*/ + addbyte(0xc0); + addbyte(0x0F); addbyte(0x85); /*JNZ 0*/ + addlong((uint32_t)(uintptr_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(uintptr_t)(&block->data[block_pos + 4])); + + codegen_endpc = (cs + cpu_state.pc) + 8; +} + +#endif diff --git a/src/codegen/codegen_x86-64.h b/src/codegen/codegen_x86-64.h new file mode 100644 index 000000000..648a30342 --- /dev/null +++ b/src/codegen/codegen_x86-64.h @@ -0,0 +1,23 @@ +#define BLOCK_SIZE 0x4000 +#define BLOCK_MASK 0x3fff +#define BLOCK_START 0 + +#define HASH_SIZE 0x20000 +#define HASH_MASK 0x1ffff + +#define HASH(l) ((l) & 0x1ffff) + +#define BLOCK_EXIT_OFFSET 0x7e0 +#define BLOCK_GPF_OFFSET (BLOCK_EXIT_OFFSET - 20) + +#define BLOCK_MAX 1620 + +enum +{ + OP_RET = 0xc3 +}; + +#define NR_HOST_REGS 4 +extern int host_reg_mapping[NR_HOST_REGS]; +#define NR_HOST_XMM_REGS 8 +extern int host_reg_xmm_mapping[NR_HOST_XMM_REGS]; diff --git a/src/codegen/codegen_x86.c b/src/codegen/codegen_x86.c new file mode 100644 index 000000000..44d66b42f --- /dev/null +++ b/src/codegen/codegen_x86.c @@ -0,0 +1,2156 @@ +/* + * VARCem Virtual ARchaeological Computer EMulator. + * An emulator of (mostly) x86-based PC systems and devices, + * using the ISA,EISA,VLB,MCA and PCI system buses, roughly + * spanning the era between 1981 and 1995. + * + * This file is part of the VARCem Project. + * + * Dynamic Recompiler for Intel 32-bit systems. + * + * + * + * Authors: Fred N. van Kempen, + * Sarah Walker, + * Miran Grca, + * + * Copyright 2018 Fred N. van Kempen. + * Copyright 2008-2018 Sarah Walker. + * Copyright 2016-2018 Miran Grca. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the: + * + * Free Software Foundation, Inc. + * 59 Temple Place - Suite 330 + * Boston, MA 02111-1307 + * USA. + */ +#if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 + +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/mem.h> +#include "x86.h" +#include "x86_flags.h" +#include "x86_ops.h" +#include "x87.h" + +#include "386_common.h" + +#include "codegen.h" +#include "codegen_accumulate.h" +#include "codegen_ops.h" +#include "codegen_ops_x86.h" + +#ifdef __linux__ +#include +#include +#endif +#if defined _WIN32 +#include +#endif + +int codegen_flat_ds, codegen_flat_ss; +int mmx_ebx_ecx_loaded; +int codegen_flags_changed = 0; +int codegen_fpu_entered = 0; +int codegen_mmx_entered = 0; +int codegen_fpu_loaded_iq[8]; +x86seg *op_ea_seg; +int op_ssegs; +uint32_t op_old_pc; + +uint32_t recomp_page = -1; + +int host_reg_mapping[NR_HOST_REGS]; +int host_reg_xmm_mapping[NR_HOST_XMM_REGS]; +codeblock_t *codeblock; +codeblock_t **codeblock_hash; + + +int block_current = 0; +static int block_num; +int block_pos; + +int cpu_recomp_flushes, cpu_recomp_flushes_latched; +int cpu_recomp_evicted, cpu_recomp_evicted_latched; +int cpu_recomp_reuse, cpu_recomp_reuse_latched; +int cpu_recomp_removed, cpu_recomp_removed_latched; + +uint32_t codegen_endpc; + +int codegen_block_cycles; +static int codegen_block_ins; +static int codegen_block_full_ins; + +static uint32_t last_op32; +static x86seg *last_ea_seg; +static int last_ssegs; + +static uint32_t mem_abrt_rout; +uint32_t mem_load_addr_ea_b; +uint32_t mem_load_addr_ea_w; +uint32_t mem_load_addr_ea_l; +uint32_t mem_load_addr_ea_q; +uint32_t mem_store_addr_ea_b; +uint32_t mem_store_addr_ea_w; +uint32_t mem_store_addr_ea_l; +uint32_t mem_store_addr_ea_q; +uint32_t mem_load_addr_ea_b_no_abrt; +uint32_t mem_store_addr_ea_b_no_abrt; +uint32_t mem_load_addr_ea_w_no_abrt; +uint32_t mem_store_addr_ea_w_no_abrt; +uint32_t mem_load_addr_ea_l_no_abrt; +uint32_t mem_store_addr_ea_l_no_abrt; +uint32_t mem_check_write; +uint32_t mem_check_write_w; +uint32_t mem_check_write_l; + +static uint32_t gen_MEM_LOAD_ADDR_EA_B() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x0f); /*MOVZX EAX, B[EDX+EDI]*/ + addbyte(0xb6); + addbyte(0x04); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmembl*/ + addlong((uint32_t)readmemb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*MOVZX EAX, AL*/ + addbyte(0xb6); + addbyte(0xc0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_LOAD_ADDR_EA_W() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x0f); /*MOVZX EAX, [EDX+EDI]W*/ + addbyte(0xb7); + addbyte(0x04); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemwl*/ + addlong((uint32_t)readmemwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*MOVZX EAX, AX*/ + addbyte(0xb7); + addbyte(0xc0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_LOAD_ADDR_EA_L() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x8b); /*MOV EAX, [EDX+EDI]*/ + addbyte(0x04); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemll*/ + addlong((uint32_t)readmemll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_LOAD_ADDR_EA_Q() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 7*/ + addbyte(0xc7); + addlong(7); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+4+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+4+1); + addbyte(0x8b); /*MOV EAX, [EDX+EDI]*/ + addbyte(0x04); + addbyte(0x3a); + addbyte(0x8b); /*MOV EDX, [EDX+EDI+4]*/ + addbyte(0x54); + addbyte(0x3a); + addbyte(4); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemql*/ + addlong((uint32_t)readmemql - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_STORE_ADDR_EA_B() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xc0 | (REG_ESI << 3) | REG_EDI); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x88); /*MOV [EDI+ESI],CL*/ + addbyte(0x04 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememb386l*/ + addlong((uint32_t)writememb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_STORE_ADDR_EA_W() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x66); /*MOV [EDI+ESI],CX*/ + addbyte(0x89); + addbyte(0x04 | (REG_CX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememwl*/ + addlong((uint32_t)writememwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_STORE_ADDR_EA_L() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x89); /*MOV [EDI+ESI],ECX*/ + addbyte(0x04 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememll*/ + addlong((uint32_t)writememll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_STORE_ADDR_EA_Q() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = EBX/ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EDX, ESI*/ + addbyte(0xf2); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 7*/ + addbyte(0xc7); + addlong(7); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+4+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+4+1); + addbyte(0x89); /*MOV [EDI+ESI],EBX*/ + addbyte(0x04 | (REG_EBX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0x89); /*MOV 4[EDI+ESI],EBX*/ + addbyte(0x44 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(4); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x52); /*PUSH EDX*/ + addbyte(0xe8); /*CALL writememql*/ + addlong((uint32_t)writememql - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 16*/ + addbyte(0xc4); + addbyte(16); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_B_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_LOAD_ADDR_EA_B_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x0f); /*MOVZX ECX, B[EDX+EDI]*/ + addbyte(0xb6); + addbyte(0x0c); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmembl*/ + addlong((uint32_t)readmemb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); +#endif + addbyte(0x0f); /*MOVZX ECX, AL*/ + addbyte(0xb6); + addbyte(0xc8); +#ifndef RELEASE_BUILD + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_W_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_LOAD_ADDR_EA_W_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x0f); /*MOVZX ECX, [EDX+EDI]W*/ + addbyte(0xb7); + addbyte(0x0c); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemwl*/ + addlong((uint32_t)readmemwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); +#endif + addbyte(0x0f); /*MOVZX ECX, AX*/ + addbyte(0xb7); + addbyte(0xc8); +#ifndef RELEASE_BUILD + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_L_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_LOAD_ADDR_EA_L_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x75); /*JE slowpath*/ + addbyte(3+2+3+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x8b); /*MOV ECX, [EDX+EDI]*/ + addbyte(0x0c); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemll*/ + addlong((uint32_t)readmemll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0x83); /*SUBL 4,%esp*/ + addbyte(0xEC); + addbyte(4); + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_B_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_STORE_ADDR_EA_B_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xc0 | (REG_ESI << 3) | REG_EDI); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x88); /*MOV [EDI+ESI],CL*/ + addbyte(0x04 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememb386l*/ + addlong((uint32_t)writememb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_W_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_STORE_ADDR_EA_W_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 1*/ + addbyte(0xc7); + addlong(1); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x66); /*MOV [EDI+ESI],CX*/ + addbyte(0x89); + addbyte(0x04 | (REG_CX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememwl*/ + addlong((uint32_t)writememwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +#ifndef RELEASE_BUILD +static char gen_MEM_STORE_ADDR_EA_L_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_L_NO_ABRT aborted\n"; +#endif +static uint32_t gen_MEM_STORE_ADDR_EA_L_NO_ABRT() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 3*/ + addbyte(0xc7); + addlong(3); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x75); /*JNE slowpath*/ + addbyte(3+2+3+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x89); /*MOV [EDI+ESI],ECX*/ + addbyte(0x04 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememll*/ + addlong((uint32_t)writememll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); +#ifndef RELEASE_BUILD + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); +#endif + addbyte(0xc3); /*RET*/ +#ifndef RELEASE_BUILD + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_L_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong((uint32_t)gen_MEM_STORE_ADDR_EA_L_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ +#endif + return addr; +} + +static uint32_t gen_MEM_CHECK_WRITE() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal32*/ + addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_CHECK_WRITE_W() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 1[EDI]*/ + addbyte(0x77); + addbyte(0x01); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x89); /*MOV EAX, EDI*/ + addbyte(0xf8); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[ESI*4],-1*/ + addbyte(0x3c); + addbyte(0xb5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x89); /*MOV EDI, EAX*/ + addbyte(0xc7); + /*slowpath_lp:*/ + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal32*/ + addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x83); /*ADD ESP, 4*/ + addbyte(0xc4); + addbyte(4); + addbyte(0x83); /*ADD EDI, 1*/ + addbyte(0xc7); + addbyte(1); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $fff, EDI*/ + addbyte(0xc7); + addlong(0xfff); + addbyte(0x74); /*JE slowpath_lp*/ + addbyte(-33); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_CHECK_WRITE_L() +{ + uint32_t addr = (uint32_t)&codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 3[EDI]*/ + addbyte(0x77); + addbyte(0x03); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x89); /*MOV EAX, EDI*/ + addbyte(0xf8); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[ESI*4],-1*/ + addbyte(0x3c); + addbyte(0xb5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x89); /*MOV EDI, EAX*/ + addbyte(0xc7); + /*slowpath_lp:*/ + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal32*/ + addlong((uint32_t)mmutranslatereal32 - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x83); /*ADD ESP, 4*/ + addbyte(0xc4); + addbyte(4); + addbyte(0x83); /*ADD EDI, 3*/ + addbyte(0xc7); + addbyte(3); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte((uint8_t)cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 2-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST EDI, FFC*/ + addbyte(0xc7); + addlong(0xffc); + addbyte(0x74); /*JE slowpath_lp*/ + addbyte(-33); + addbyte(0xc3); /*RET*/ + + return addr; +} + +void codegen_init() +{ +#ifdef __linux__ + void *start; + size_t len; + long pagesize = sysconf(_SC_PAGESIZE); + long pagemask = ~(pagesize - 1); +#endif + +#ifdef _WIN32 + codeblock = VirtualAlloc(NULL, (BLOCK_SIZE+1) * sizeof(codeblock_t), MEM_COMMIT, PAGE_EXECUTE_READWRITE); +#else + codeblock = malloc((BLOCK_SIZE+1) * sizeof(codeblock_t)); +#endif + codeblock_hash = malloc(HASH_SIZE * sizeof(codeblock_t *)); + + memset(codeblock, 0, (BLOCK_SIZE+1) * sizeof(codeblock_t)); + memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *)); + +#ifdef __linux__ + start = (void *)((long)codeblock & pagemask); + len = (((BLOCK_SIZE+1) * sizeof(codeblock_t)) + pagesize) & pagemask; + if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) + { + perror("mprotect"); + exit(-1); + } +#endif + + block_current = BLOCK_SIZE; + block_pos = 0; + mem_abrt_rout = (uint32_t)&codeblock[block_current].data[block_pos]; + addbyte(0x83); /*ADDL $16+4,%esp*/ + addbyte(0xC4); + addbyte(0x10+4); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x5e); /*POP ESI*/ + addbyte(0x5d); /*POP EBP*/ + addbyte(0x5b); /*POP EDX*/ + addbyte(0xC3); /*RET*/ + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_l = (uint32_t)gen_MEM_LOAD_ADDR_EA_L(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_w = (uint32_t)gen_MEM_LOAD_ADDR_EA_W(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_b = (uint32_t)gen_MEM_LOAD_ADDR_EA_B(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_q = (uint32_t)gen_MEM_LOAD_ADDR_EA_Q(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_l = (uint32_t)gen_MEM_STORE_ADDR_EA_L(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_w = (uint32_t)gen_MEM_STORE_ADDR_EA_W(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_b = (uint32_t)gen_MEM_STORE_ADDR_EA_B(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_q = (uint32_t)gen_MEM_STORE_ADDR_EA_Q(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_b_no_abrt = (uint32_t)gen_MEM_LOAD_ADDR_EA_B_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_b_no_abrt = (uint32_t)gen_MEM_STORE_ADDR_EA_B_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_w_no_abrt = (uint32_t)gen_MEM_LOAD_ADDR_EA_W_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_w_no_abrt = (uint32_t)gen_MEM_STORE_ADDR_EA_W_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_l_no_abrt = (uint32_t)gen_MEM_LOAD_ADDR_EA_L_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_l_no_abrt = (uint32_t)gen_MEM_STORE_ADDR_EA_L_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_check_write = (uint32_t)gen_MEM_CHECK_WRITE(); + block_pos = (block_pos + 15) & ~15; + mem_check_write_w = (uint32_t)gen_MEM_CHECK_WRITE_W(); + block_pos = (block_pos + 15) & ~15; + mem_check_write_l = (uint32_t)gen_MEM_CHECK_WRITE_L(); + +#ifndef _MSC_VER + asm( + "fstcw %0\n" + : "=m" (cpu_state.old_npxc) + ); +#else + __asm + { + fstcw cpu_state.old_npxc + } +#endif +} + +void codegen_reset() +{ + memset(codeblock, 0, BLOCK_SIZE * sizeof(codeblock_t)); + memset(codeblock_hash, 0, HASH_SIZE * sizeof(codeblock_t *)); + mem_reset_page_blocks(); +} + +void dump_block() +{ +} + +static void add_to_block_list(codeblock_t *block) +{ + codeblock_t *block_prev = pages[block->phys >> 12].block[(block->phys >> 10) & 3]; + + if (!block->page_mask) + fatal("add_to_block_list - mask = 0\n"); + + if (block_prev) + { + block->next = block_prev; + block_prev->prev = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; + } + else + { + block->next = NULL; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; + } + + if (block->next) + { + if (!block->next->valid) + fatal("block->next->valid=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos); + } + + if (block->page_mask2) + { + block_prev = pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]; + + if (block_prev) + { + block->next_2 = block_prev; + block_prev->prev_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; + } + else + { + block->next_2 = NULL; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; + } + } +} + +static void remove_from_block_list(codeblock_t *block, uint32_t pc) +{ + if (!block->page_mask) + return; + + if (block->prev) + { + block->prev->next = block->next; + if (block->next) + block->next->prev = block->prev; + } + else + { + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block->next; + if (block->next) + block->next->prev = NULL; + else + mem_flush_write_page(block->phys, 0); + } + if (!block->page_mask2) + { + if (block->prev_2 || block->next_2) + fatal("Invalid block_2\n"); + return; + } + + if (block->prev_2) + { + block->prev_2->next_2 = block->next_2; + if (block->next_2) + block->next_2->prev_2 = block->prev_2; + } + else + { + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block->next_2; + if (block->next_2) + block->next_2->prev_2 = NULL; + else + mem_flush_write_page(block->phys_2, 0); + } +} + +static void delete_block(codeblock_t *block) +{ + uint32_t old_pc = block->pc; + + if (block == codeblock_hash[HASH(block->phys)]) + codeblock_hash[HASH(block->phys)] = NULL; + + if (!block->valid) + fatal("Deleting deleted block\n"); + block->valid = 0; + + codeblock_tree_delete(block); + remove_from_block_list(block, old_pc); +} + +void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) +{ + struct codeblock_t *block = page->block[(phys_addr >> 10) & 3]; + + while (block) + { + if (mask & block->page_mask) + { + delete_block(block); + cpu_recomp_evicted++; + } + if (block == block->next) + fatal("Broken 1\n"); + block = block->next; + } + + block = page->block_2[(phys_addr >> 10) & 3]; + + while (block) + { + if (mask & block->page_mask2) + { + delete_block(block); + cpu_recomp_evicted++; + } + if (block == block->next_2) + fatal("Broken 2\n"); + block = block->next_2; + } +} + +void codegen_block_init(uint32_t phys_addr) +{ + codeblock_t *block; + page_t *page = &pages[phys_addr >> 12]; + + if (!page->block[(phys_addr >> 10) & 3]) + mem_flush_write_page(phys_addr, cs+cpu_state.pc); + + block_current = (block_current + 1) & BLOCK_MASK; + block = &codeblock[block_current]; + + if (block->valid != 0) + { + delete_block(block); + cpu_recomp_reuse++; + } + block_num = HASH(phys_addr); + codeblock_hash[block_num] = &codeblock[block_current]; + + block->valid = 1; + block->ins = 0; + block->pc = cs + cpu_state.pc; + block->_cs = cs; + block->pnt = block_current; + block->phys = phys_addr; + block->dirty_mask = &page->dirty_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + block->dirty_mask2 = NULL; + block->next = block->prev = NULL; + block->next_2 = block->prev_2 = NULL; + block->page_mask = 0; + block->flags = CODEBLOCK_STATIC_TOP; + block->status = cpu_cur_status; + + block->was_recompiled = 0; + + recomp_page = block->phys & ~0xfff; + + codeblock_tree_add(block); +} + +void codegen_block_start_recompile(codeblock_t *block) +{ + page_t *page = &pages[block->phys >> 12]; + + if (!page->block[(block->phys >> 10) & 3]) + mem_flush_write_page(block->phys, cs+cpu_state.pc); + + block_num = HASH(block->phys); + block_current = block->pnt; + + if (block->pc != cs + cpu_state.pc || block->was_recompiled) + fatal("Recompile to used block!\n"); + + block->status = cpu_cur_status; + + block_pos = BLOCK_GPF_OFFSET; + addbyte(0xc7); /*MOV [ESP],0*/ + addbyte(0x04); + addbyte(0x24); + addlong(0); + addbyte(0xc7); /*MOV [ESP+4],0*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x04); + addlong(0); + addbyte(0xe8); /*CALL x86gpf*/ + addlong((uint32_t)x86gpf - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + block_pos = BLOCK_EXIT_OFFSET; /*Exit code*/ + addbyte(0x83); /*ADDL $16,%esp*/ + addbyte(0xC4); + addbyte(0x10); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x5e); /*POP ESI*/ + addbyte(0x5d); /*POP EBP*/ + addbyte(0x5b); /*POP EDX*/ + addbyte(0xC3); /*RET*/ + cpu_block_end = 0; + block_pos = 0; /*Entry code*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0x55); /*PUSH EBP*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0x57); /*PUSH EDI*/ + addbyte(0x83); /*SUBL $16,%esp*/ + addbyte(0xEC); + addbyte(0x10); + addbyte(0xBD); /*MOVL EBP, &cpu_state*/ + addlong(((uintptr_t)&cpu_state) + 128); + + last_op32 = -1; + last_ea_seg = NULL; + last_ssegs = -1; + + codegen_block_cycles = 0; + codegen_timing_block_start(); + + codegen_block_ins = 0; + codegen_block_full_ins = 0; + + recomp_page = block->phys & ~0xfff; + + codegen_flags_changed = 0; + codegen_fpu_entered = 0; + codegen_mmx_entered = 0; + + codegen_fpu_loaded_iq[0] = codegen_fpu_loaded_iq[1] = codegen_fpu_loaded_iq[2] = codegen_fpu_loaded_iq[3] = + codegen_fpu_loaded_iq[4] = codegen_fpu_loaded_iq[5] = codegen_fpu_loaded_iq[6] = codegen_fpu_loaded_iq[7] = 0; + + cpu_state.seg_ds.checked = cpu_state.seg_es.checked = cpu_state.seg_fs.checked = cpu_state.seg_gs.checked = (cr0 & 1) ? 0 : 1; + + block->TOP = cpu_state.TOP & 7; + block->was_recompiled = 1; + + codegen_flat_ds = !(cpu_cur_status & CPU_STATUS_NOTFLATDS); + codegen_flat_ss = !(cpu_cur_status & CPU_STATUS_NOTFLATSS); + + codegen_accumulate_reset(); +} + +void codegen_block_remove() +{ + codeblock_t *block = &codeblock[block_current]; + + delete_block(block); + cpu_recomp_removed++; + + recomp_page = -1; +} + +void codegen_block_generate_end_mask() +{ + codeblock_t *block = &codeblock[block_current]; + uint32_t start_pc; + uint32_t end_pc; + + block->endpc = codegen_endpc; + + block->page_mask = 0; + start_pc = (block->pc & 0x3ff) & ~15; + if ((block->pc ^ block->endpc) & ~0x3ff) + end_pc = 0x3ff & ~15; + else + end_pc = (block->endpc & 0x3ff) & ~15; + if (end_pc < start_pc) + end_pc = 0x3ff; + start_pc >>= PAGE_MASK_SHIFT; + end_pc >>= PAGE_MASK_SHIFT; + + for (; start_pc <= end_pc; start_pc++) + { + block->page_mask |= ((uint64_t)1 << start_pc); + } + + pages[block->phys >> 12].code_present_mask[(block->phys >> 10) & 3] |= block->page_mask; + + block->phys_2 = -1; + block->page_mask2 = 0; + block->next_2 = block->prev_2 = NULL; + if ((block->pc ^ block->endpc) & ~0x3ff) + { + block->phys_2 = get_phys_noabrt(block->endpc); + if (block->phys_2 != -1) + { + page_t *page_2 = &pages[block->phys_2 >> 12]; + + start_pc = 0; + end_pc = (block->endpc & 0x3ff) >> PAGE_MASK_SHIFT; + for (; start_pc <= end_pc; start_pc++) + block->page_mask2 |= ((uint64_t)1 << start_pc); + page_2->code_present_mask[(block->phys_2 >> 10) & 3] |= block->page_mask2; + + if (!pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]) + mem_flush_write_page(block->phys_2, block->endpc); + + if (!block->page_mask2) + fatal("!page_mask2\n"); + if (block->next_2) + { + if (!block->next_2->valid) + fatal("block->next_2->valid=0 %p\n", (void *)block->next_2); + } + + block->dirty_mask2 = &page_2->dirty_mask[(block->phys_2 >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + } + } + + recomp_page = -1; +} + +void codegen_block_end() +{ + codeblock_t *block = &codeblock[block_current]; + + codegen_block_generate_end_mask(); + add_to_block_list(block); +} + +void codegen_block_end_recompile(codeblock_t *block) +{ + codegen_timing_block_end(); + codegen_accumulate(ACCREG_cycles, -codegen_block_cycles); + + codegen_accumulate_flush(); + + addbyte(0x83); /*ADDL $16,%esp*/ + addbyte(0xC4); + addbyte(0x10); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x5e); /*POP ESI*/ + addbyte(0x5d); /*POP EBP*/ + addbyte(0x5b); /*POP EDX*/ + addbyte(0xC3); /*RET*/ + + if (block_pos > BLOCK_GPF_OFFSET) + fatal("Over limit!\n"); + + remove_from_block_list(block, block->pc); + block->next = block->prev = NULL; + block->next_2 = block->prev_2 = NULL; + codegen_block_generate_end_mask(); + add_to_block_list(block); + + if (!(block->flags & CODEBLOCK_HAS_FPU)) + block->flags &= ~CODEBLOCK_STATIC_TOP; +} + +void codegen_flush() +{ + return; +} + +static int opcode_modrm[256] = +{ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*00*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*10*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*20*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, /*30*/ + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*40*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/ + 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, /*60*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*70*/ + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*80*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*90*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*a0*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*b0*/ + + 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/ + 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, /*d0*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*e0*/ + 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, /*f0*/ +}; +int opcode_0f_modrm[256] = +{ + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, /*00*/ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, /*10*/ + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*20*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*30*/ + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*40*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*50*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, /*60*/ + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, /*70*/ + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*80*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*90*/ + 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, /*a0*/ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, /*b0*/ + + 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/ + 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*d0*/ + 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, /*e0*/ + 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0 /*f0*/ +}; + +void codegen_debug() +{ +} + +static x86seg *codegen_generate_ea_16_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc) +{ + if (!cpu_mod && cpu_rm == 6) + { + addbyte(0xC7); /*MOVL $0,(ssegs)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + addlong((fetchdat >> 8) & 0xffff); + (*op_pc) += 2; + } + else + { + switch (cpu_mod) + { + case 0: + addbyte(0xa1); /*MOVL *mod1add[0][cpu_rm], %eax*/ + addlong((uint32_t)mod1add[0][cpu_rm]); + addbyte(0x03); /*ADDL *mod1add[1][cpu_rm], %eax*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][cpu_rm]); + break; + case 1: + addbyte(0xb8); /*MOVL ,%eax*/ + addlong((uint32_t)(int8_t)(rmdat >> 8)); + addbyte(0x03); /*ADDL *mod1add[0][cpu_rm], %eax*/ + addbyte(0x05); + addlong((uint32_t)mod1add[0][cpu_rm]); + addbyte(0x03); /*ADDL *mod1add[1][cpu_rm], %eax*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][cpu_rm]); + (*op_pc)++; + break; + case 2: + addbyte(0xb8); /*MOVL ,%eax*/ + addlong((fetchdat >> 8) & 0xffff); + addbyte(0x03); /*ADDL *mod1add[0][cpu_rm], %eax*/ + addbyte(0x05); + addlong((uint32_t)mod1add[0][cpu_rm]); + addbyte(0x03); /*ADDL *mod1add[1][cpu_rm], %eax*/ + addbyte(0x05); + addlong((uint32_t)mod1add[1][cpu_rm]); + (*op_pc) += 2; + break; + } + addbyte(0x25); /*ANDL $0xffff, %eax*/ + addlong(0xffff); + addbyte(0xa3); + addlong((uint32_t)&cpu_state.eaaddr); + + if (mod1seg[cpu_rm] == &ss && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + } + return op_ea_seg; +} + +static x86seg *codegen_generate_ea_32_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, int stack_offset) +{ + uint32_t new_eaaddr; + + if (cpu_rm == 4) + { + uint8_t sib = fetchdat >> 8; + (*op_pc)++; + + switch (cpu_mod) + { + case 0: + if ((sib & 7) == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL ,%eax*/ + addlong(new_eaaddr); + (*op_pc) += 4; + } + else + { + addbyte(0x8b); /*MOVL regs[sib&7].l, %eax*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + } + break; + case 1: + new_eaaddr = (uint32_t)(int8_t)((fetchdat >> 16) & 0xff); + addbyte(0xb8); /*MOVL new_eaaddr, %eax*/ + addlong(new_eaaddr); + addbyte(0x03); /*ADDL regs[sib&7].l, %eax*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + (*op_pc)++; + break; + case 2: + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xb8); /*MOVL new_eaaddr, %eax*/ + addlong(new_eaaddr); + addbyte(0x03); /*ADDL regs[sib&7].l, %eax*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[sib & 7].l)); + (*op_pc) += 4; + break; + } + if (stack_offset && (sib & 7) == 4 && (cpu_mod || (sib & 7) != 5)) /*ESP*/ + { + addbyte(0x05); + addlong(stack_offset); + } + if (((sib & 7) == 4 || (cpu_mod && (sib & 7) == 5)) && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (((sib >> 3) & 7) != 4) + { + switch (sib >> 6) + { + case 0: + addbyte(0x03); /*ADDL regs[sib&7].l, %eax*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); + break; + case 1: + addbyte(0x8B); addbyte(0x5D); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL armregs[RD],%ebx*/ + addbyte(0x01); addbyte(0xD8); /*ADDL %ebx,%eax*/ + addbyte(0x01); addbyte(0xD8); /*ADDL %ebx,%eax*/ + break; + case 2: + addbyte(0x8B); addbyte(0x5D); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL armregs[RD],%ebx*/ + addbyte(0xC1); addbyte(0xE3); addbyte(2); /*SHL $2,%ebx*/ + addbyte(0x01); addbyte(0xD8); /*ADDL %ebx,%eax*/ + break; + case 3: + addbyte(0x8B); addbyte(0x5D); addbyte((uint8_t)cpu_state_offset(regs[(sib >> 3) & 7].l)); /*MOVL armregs[RD],%ebx*/ + addbyte(0xC1); addbyte(0xE3); addbyte(3); /*SHL $2,%ebx*/ + addbyte(0x01); addbyte(0xD8); /*ADDL %ebx,%eax*/ + break; + } + } + addbyte(0xa3); + addlong((uint32_t)&cpu_state.eaaddr); + } + else + { + if (!cpu_mod && cpu_rm == 5) + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0xC7); /*MOVL $new_eaaddr,(eaaddr)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(eaaddr)); + addlong(new_eaaddr); + (*op_pc) += 4; + return op_ea_seg; + } + addbyte(0x8b); /*MOVL regs[sib&7].l, %eax*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(regs[cpu_rm].l)); + cpu_state.eaaddr = cpu_state.regs[cpu_rm].l; + if (cpu_mod) + { + if (cpu_rm == 5 && !op_ssegs) + op_ea_seg = &cpu_state.seg_ss; + if (cpu_mod == 1) + { + addbyte(0x05); + addlong((uint32_t)(int8_t)(fetchdat >> 8)); + (*op_pc)++; + } + else + { + new_eaaddr = fastreadl(cs + (*op_pc) + 1); + addbyte(0x05); + addlong(new_eaaddr); + (*op_pc) += 4; + } + } + addbyte(0xa3); + addlong((uint32_t)&cpu_state.eaaddr); + } + return op_ea_seg; +} + +void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc) +{ + codeblock_t *block = &codeblock[block_current]; + uint32_t op_32 = use32; + uint32_t op_pc = new_pc; + const OpFn *op_table = x86_dynarec_opcodes; + RecompOpFn *recomp_op_table = recomp_opcodes; + int opcode_shift = 0; + int opcode_mask = 0x3ff; + int over = 0; + int pc_off = 0; + int test_modrm = 1; + int c; + + op_ea_seg = &cpu_state.seg_ds; + op_ssegs = 0; + op_old_pc = old_pc; + + for (c = 0; c < NR_HOST_REGS; c++) + host_reg_mapping[c] = -1; + mmx_ebx_ecx_loaded = 0; + for (c = 0; c < NR_HOST_XMM_REGS; c++) + host_reg_xmm_mapping[c] = -1; + + codegen_timing_start(); + + while (!over) + { + switch (opcode) + { + case 0x0f: + op_table = x86_dynarec_opcodes_0f; + recomp_op_table = recomp_opcodes_0f; + over = 1; + break; + + case 0x26: /*ES:*/ + op_ea_seg = &cpu_state.seg_es; + op_ssegs = 1; + break; + case 0x2e: /*CS:*/ + op_ea_seg = &cpu_state.seg_cs; + op_ssegs = 1; + break; + case 0x36: /*SS:*/ + op_ea_seg = &cpu_state.seg_ss; + op_ssegs = 1; + break; + case 0x3e: /*DS:*/ + op_ea_seg = &cpu_state.seg_ds; + op_ssegs = 1; + break; + case 0x64: /*FS:*/ + op_ea_seg = &cpu_state.seg_fs; + op_ssegs = 1; + break; + case 0x65: /*GS:*/ + op_ea_seg = &cpu_state.seg_gs; + op_ssegs = 1; + break; + + case 0x66: /*Data size select*/ + op_32 = ((use32 & 0x100) ^ 0x100) | (op_32 & 0x200); + break; + case 0x67: /*Address size select*/ + op_32 = ((use32 & 0x200) ^ 0x200) | (op_32 & 0x100); + break; + + case 0xd8: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d8_a32 : x86_dynarec_opcodes_d8_a16; + recomp_op_table = recomp_opcodes_d8; + opcode_shift = 3; + opcode_mask = 0x1f; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xd9: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_d9_a32 : x86_dynarec_opcodes_d9_a16; + recomp_op_table = recomp_opcodes_d9; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xda: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_da_a32 : x86_dynarec_opcodes_da_a16; + recomp_op_table = recomp_opcodes_da; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdb: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_db_a32 : x86_dynarec_opcodes_db_a16; + recomp_op_table = recomp_opcodes_db; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdc: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dc_a32 : x86_dynarec_opcodes_dc_a16; + recomp_op_table = recomp_opcodes_dc; + opcode_shift = 3; + opcode_mask = 0x1f; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdd: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_dd_a32 : x86_dynarec_opcodes_dd_a16; + recomp_op_table = recomp_opcodes_dd; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xde: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_de_a32 : x86_dynarec_opcodes_de_a16; + recomp_op_table = recomp_opcodes_de; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + case 0xdf: + op_table = (op_32 & 0x200) ? x86_dynarec_opcodes_df_a32 : x86_dynarec_opcodes_df_a16; + recomp_op_table = recomp_opcodes_df; + opcode_mask = 0xff; + over = 1; + pc_off = -1; + test_modrm = 0; + block->flags |= CODEBLOCK_HAS_FPU; + break; + + case 0xf0: /*LOCK*/ + break; + + case 0xf2: /*REPNE*/ + op_table = x86_dynarec_opcodes_REPNE; + recomp_op_table = recomp_opcodes_REPNE; + break; + case 0xf3: /*REPE*/ + op_table = x86_dynarec_opcodes_REPE; + recomp_op_table = recomp_opcodes_REPE; + break; + + default: + goto generate_call; + } + fetchdat = fastreadl(cs + op_pc); + codegen_timing_prefix(opcode, fetchdat); + if (cpu_state.abrt) + return; + opcode = fetchdat & 0xff; + if (!pc_off) + fetchdat >>= 8; + + op_pc++; + } + +generate_call: + codegen_timing_opcode(opcode, fetchdat, op_32, op_pc); + + codegen_accumulate(ACCREG_ins, 1); + codegen_accumulate(ACCREG_cycles, -codegen_block_cycles); + codegen_block_cycles = 0; + + if ((op_table == x86_dynarec_opcodes && + ((opcode & 0xf0) == 0x70 || (opcode & 0xfc) == 0xe0 || opcode == 0xc2 || + (opcode & 0xfe) == 0xca || (opcode & 0xfc) == 0xcc || (opcode & 0xfc) == 0xe8 || + (opcode == 0xff && ((fetchdat & 0x38) >= 0x10 && (fetchdat & 0x38) < 0x30)))) || + (op_table == x86_dynarec_opcodes_0f && ((opcode & 0xf0) == 0x80))) + { + /*On some CPUs (eg K6), a jump/branch instruction may be able to pair with + subsequent instructions, so no cycles may have been deducted for it yet. + To prevent having zero cycle blocks (eg with a jump instruction pointing + to itself), apply the cycles that would be taken if this jump is taken, + then reverse it for subsequent instructions if the jump is not taken*/ + int jump_cycles = 0; + + if (codegen_timing_jump_cycles != NULL) + jump_cycles = codegen_timing_jump_cycles(); + + if (jump_cycles) + codegen_accumulate(ACCREG_cycles, -jump_cycles); + codegen_accumulate_flush(); + if (jump_cycles) + codegen_accumulate(ACCREG_cycles, jump_cycles); + } + + if ((op_table == x86_dynarec_opcodes_REPNE || op_table == x86_dynarec_opcodes_REPE) && !op_table[opcode | op_32]) + { + op_table = x86_dynarec_opcodes; + recomp_op_table = recomp_opcodes; + } + + if (recomp_op_table && recomp_op_table[(opcode | op_32) & 0x1ff]) + { + uint32_t new_pc = recomp_op_table[(opcode | op_32) & 0x1ff](opcode, fetchdat, op_32, op_pc, block); + if (new_pc) + { + if (new_pc != -1) + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.pc, new_pc); + + codegen_block_ins++; + block->ins++; + codegen_block_full_ins++; + codegen_endpc = (cs + cpu_state.pc) + 8; + + return; + } + } + + op = op_table[((opcode >> opcode_shift) | op_32) & opcode_mask]; + if (op_ssegs != last_ssegs) + { + last_ssegs = op_ssegs; + + addbyte(0xC6); /*MOVB [ssegs],op_ssegs*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ssegs)); + addbyte(op_pc + pc_off); + } + + if (!test_modrm || + (op_table == x86_dynarec_opcodes && opcode_modrm[opcode]) || + (op_table == x86_dynarec_opcodes_0f && opcode_0f_modrm[opcode])) + { + int stack_offset = 0; + + if (op_table == x86_dynarec_opcodes && opcode == 0x8f) /*POP*/ + stack_offset = (op_32 & 0x100) ? 4 : 2; + + cpu_mod = (fetchdat >> 6) & 3; + cpu_reg = (fetchdat >> 3) & 7; + cpu_rm = fetchdat & 7; + + addbyte(0xC7); /*MOVL $rm | mod | reg,(rm_mod_reg_data)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(rm_data.rm_mod_reg_data)); + addlong(cpu_rm | (cpu_mod << 8) | (cpu_reg << 16)); + + op_pc += pc_off; + if (cpu_mod != 3 && !(op_32 & 0x200)) + op_ea_seg = codegen_generate_ea_16_long(op_ea_seg, fetchdat, op_ssegs, &op_pc); + if (cpu_mod != 3 && (op_32 & 0x200)) + op_ea_seg = codegen_generate_ea_32_long(op_ea_seg, fetchdat, op_ssegs, &op_pc, stack_offset); + op_pc -= pc_off; + } + + if (op_ea_seg != last_ea_seg) + { + last_ea_seg = op_ea_seg; + addbyte(0xC7); /*MOVL $&cpu_state.seg_ds,(ea_seg)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(ea_seg)); + addlong((uint32_t)op_ea_seg); + } + + codegen_accumulate_flush(); + + addbyte(0xC7); /*MOVL pc,new_pc*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(pc)); + addlong(op_pc + pc_off); + + addbyte(0xC7); /*MOVL $old_pc,(oldpc)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(oldpc)); + addlong(old_pc); + + if (op_32 != last_op32) + { + last_op32 = op_32; + addbyte(0xC7); /*MOVL $use32,(op32)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(op32)); + addlong(op_32); + } + + addbyte(0xC7); /*MOVL $fetchdat,(%esp)*/ + addbyte(0x04); + addbyte(0x24); + addlong(fetchdat); + + addbyte(0xE8); /*CALL*/ + addlong(((uint8_t *)op - (uint8_t *)(&block->data[block_pos + 4]))); + + codegen_block_ins++; + + block->ins++; + + addbyte(0x09); /*OR %eax, %eax*/ + addbyte(0xc0); + addbyte(0x0F); addbyte(0x85); /*JNZ 0*/ + addlong((uint32_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(&block->data[block_pos + 4])); + + codegen_endpc = (cs + cpu_state.pc) + 8; +} + +#endif diff --git a/src/codegen/codegen_x86.h b/src/codegen/codegen_x86.h new file mode 100644 index 000000000..3a3662d32 --- /dev/null +++ b/src/codegen/codegen_x86.h @@ -0,0 +1,42 @@ +#define BLOCK_SIZE 0x4000 +#define BLOCK_MASK 0x3fff +#define BLOCK_START 0 + +#define HASH_SIZE 0x20000 +#define HASH_MASK 0x1ffff + +#define HASH(l) ((l) & 0x1ffff) + +#define BLOCK_EXIT_OFFSET 0x7f0 +#define BLOCK_GPF_OFFSET (BLOCK_EXIT_OFFSET - 20) + +#define BLOCK_MAX 1720 + +enum +{ + OP_RET = 0xc3 +}; + +#define NR_HOST_REGS 4 +extern int host_reg_mapping[NR_HOST_REGS]; +#define NR_HOST_XMM_REGS 8 +extern int host_reg_xmm_mapping[NR_HOST_XMM_REGS]; + +extern uint32_t mem_load_addr_ea_b; +extern uint32_t mem_load_addr_ea_w; +extern uint32_t mem_load_addr_ea_l; +extern uint32_t mem_load_addr_ea_q; +extern uint32_t mem_store_addr_ea_b; +extern uint32_t mem_store_addr_ea_w; +extern uint32_t mem_store_addr_ea_l; +extern uint32_t mem_store_addr_ea_q; + +extern uint32_t mem_load_addr_ea_b_no_abrt; +extern uint32_t mem_store_addr_ea_b_no_abrt; +extern uint32_t mem_load_addr_ea_w_no_abrt; +extern uint32_t mem_store_addr_ea_w_no_abrt; +extern uint32_t mem_load_addr_ea_l_no_abrt; +extern uint32_t mem_store_addr_ea_l_no_abrt; +extern uint32_t mem_check_write; +extern uint32_t mem_check_write_w; +extern uint32_t mem_check_write_l; diff --git a/src/codegen/x86_flags.h b/src/codegen/x86_flags.h new file mode 100644 index 000000000..7068a243d --- /dev/null +++ b/src/codegen/x86_flags.h @@ -0,0 +1,526 @@ +/* Copyright holders: Sarah Walker + see COPYING for more details +*/ +extern int tempc; + +enum +{ + FLAGS_UNKNOWN, + + FLAGS_ZN8, + FLAGS_ZN16, + FLAGS_ZN32, + + FLAGS_ADD8, + FLAGS_ADD16, + FLAGS_ADD32, + + FLAGS_SUB8, + FLAGS_SUB16, + FLAGS_SUB32, + + FLAGS_SHL8, + FLAGS_SHL16, + FLAGS_SHL32, + + FLAGS_SHR8, + FLAGS_SHR16, + FLAGS_SHR32, + + FLAGS_SAR8, + FLAGS_SAR16, + FLAGS_SAR32, + + FLAGS_INC8, + FLAGS_INC16, + FLAGS_INC32, + + FLAGS_DEC8, + FLAGS_DEC16, + FLAGS_DEC32 +}; + +static __inline int ZF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + case FLAGS_ADD8: + case FLAGS_ADD16: + case FLAGS_ADD32: + case FLAGS_SUB8: + case FLAGS_SUB16: + case FLAGS_SUB32: + case FLAGS_SHL8: + case FLAGS_SHL16: + case FLAGS_SHL32: + case FLAGS_SHR8: + case FLAGS_SHR16: + case FLAGS_SHR32: + case FLAGS_SAR8: + case FLAGS_SAR16: + case FLAGS_SAR32: + case FLAGS_INC8: + case FLAGS_INC16: + case FLAGS_INC32: + case FLAGS_DEC8: + case FLAGS_DEC16: + case FLAGS_DEC32: + return !cpu_state.flags_res; + + case FLAGS_UNKNOWN: + return cpu_state.flags & Z_FLAG; + + default: + return 0; + } +} + +static __inline int NF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ZN8: + case FLAGS_ADD8: + case FLAGS_SUB8: + case FLAGS_SHL8: + case FLAGS_SHR8: + case FLAGS_SAR8: + case FLAGS_INC8: + case FLAGS_DEC8: + return cpu_state.flags_res & 0x80; + + case FLAGS_ZN16: + case FLAGS_ADD16: + case FLAGS_SUB16: + case FLAGS_SHL16: + case FLAGS_SHR16: + case FLAGS_SAR16: + case FLAGS_INC16: + case FLAGS_DEC16: + return cpu_state.flags_res & 0x8000; + + case FLAGS_ZN32: + case FLAGS_ADD32: + case FLAGS_SUB32: + case FLAGS_SHL32: + case FLAGS_SHR32: + case FLAGS_SAR32: + case FLAGS_INC32: + case FLAGS_DEC32: + return cpu_state.flags_res & 0x80000000; + + case FLAGS_UNKNOWN: + return cpu_state.flags & N_FLAG; + + default: + return 0; + } +} + +static __inline int PF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + case FLAGS_ADD8: + case FLAGS_ADD16: + case FLAGS_ADD32: + case FLAGS_SUB8: + case FLAGS_SUB16: + case FLAGS_SUB32: + case FLAGS_SHL8: + case FLAGS_SHL16: + case FLAGS_SHL32: + case FLAGS_SHR8: + case FLAGS_SHR16: + case FLAGS_SHR32: + case FLAGS_SAR8: + case FLAGS_SAR16: + case FLAGS_SAR32: + case FLAGS_INC8: + case FLAGS_INC16: + case FLAGS_INC32: + case FLAGS_DEC8: + case FLAGS_DEC16: + case FLAGS_DEC32: + return znptable8[cpu_state.flags_res & 0xff] & P_FLAG; + + case FLAGS_UNKNOWN: + return cpu_state.flags & P_FLAG; + + default: + return 0; + } +} + +static __inline int VF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + case FLAGS_SAR8: + case FLAGS_SAR16: + case FLAGS_SAR32: + return 0; + + case FLAGS_ADD8: + case FLAGS_INC8: + return !((cpu_state.flags_op1 ^ cpu_state.flags_op2) & 0x80) && ((cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x80); + case FLAGS_ADD16: + case FLAGS_INC16: + return !((cpu_state.flags_op1 ^ cpu_state.flags_op2) & 0x8000) && ((cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x8000); + case FLAGS_ADD32: + case FLAGS_INC32: + return !((cpu_state.flags_op1 ^ cpu_state.flags_op2) & 0x80000000) && ((cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x80000000); + + case FLAGS_SUB8: + case FLAGS_DEC8: + return ((cpu_state.flags_op1 ^ cpu_state.flags_op2) & (cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x80); + case FLAGS_SUB16: + case FLAGS_DEC16: + return ((cpu_state.flags_op1 ^ cpu_state.flags_op2) & (cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x8000); + case FLAGS_SUB32: + case FLAGS_DEC32: + return ((cpu_state.flags_op1 ^ cpu_state.flags_op2) & (cpu_state.flags_op1 ^ cpu_state.flags_res) & 0x80000000); + + case FLAGS_SHL8: + return (((cpu_state.flags_op1 << cpu_state.flags_op2) ^ (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1))) & 0x80); + case FLAGS_SHL16: + return (((cpu_state.flags_op1 << cpu_state.flags_op2) ^ (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1))) & 0x8000); + case FLAGS_SHL32: + return (((cpu_state.flags_op1 << cpu_state.flags_op2) ^ (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1))) & 0x80000000); + + case FLAGS_SHR8: + return ((cpu_state.flags_op2 == 1) && (cpu_state.flags_op1 & 0x80)); + case FLAGS_SHR16: + return ((cpu_state.flags_op2 == 1) && (cpu_state.flags_op1 & 0x8000)); + case FLAGS_SHR32: + return ((cpu_state.flags_op2 == 1) && (cpu_state.flags_op1 & 0x80000000)); + + case FLAGS_UNKNOWN: + return cpu_state.flags & V_FLAG; + + default: + return 0; + } +} + +static __inline int AF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + case FLAGS_SHL8: + case FLAGS_SHL16: + case FLAGS_SHL32: + case FLAGS_SHR8: + case FLAGS_SHR16: + case FLAGS_SHR32: + case FLAGS_SAR8: + case FLAGS_SAR16: + case FLAGS_SAR32: + return 0; + + case FLAGS_ADD8: + case FLAGS_ADD16: + case FLAGS_ADD32: + case FLAGS_INC8: + case FLAGS_INC16: + case FLAGS_INC32: + return ((cpu_state.flags_op1 & 0xF) + (cpu_state.flags_op2 & 0xF)) & 0x10; + + case FLAGS_SUB8: + case FLAGS_SUB16: + case FLAGS_SUB32: + case FLAGS_DEC8: + case FLAGS_DEC16: + case FLAGS_DEC32: + return ((cpu_state.flags_op1 & 0xF) - (cpu_state.flags_op2 & 0xF)) & 0x10; + + case FLAGS_UNKNOWN: + return cpu_state.flags & A_FLAG; + + default: + return 0; + } +} + +static __inline int CF_SET() +{ + switch (cpu_state.flags_op) + { + case FLAGS_ADD8: + return (cpu_state.flags_op1 + cpu_state.flags_op2) & 0x100; + case FLAGS_ADD16: + return (cpu_state.flags_op1 + cpu_state.flags_op2) & 0x10000; + case FLAGS_ADD32: + return (cpu_state.flags_res < cpu_state.flags_op1); + + case FLAGS_SUB8: + case FLAGS_SUB16: + case FLAGS_SUB32: + return (cpu_state.flags_op1 < cpu_state.flags_op2); + + case FLAGS_SHL8: + return (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1)) & 0x80; + case FLAGS_SHL16: + return (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1)) & 0x8000; + case FLAGS_SHL32: + return (cpu_state.flags_op1 << (cpu_state.flags_op2 - 1)) & 0x80000000; + + case FLAGS_SHR8: + case FLAGS_SHR16: + case FLAGS_SHR32: + return (cpu_state.flags_op1 >> (cpu_state.flags_op2 - 1)) & 1; + + case FLAGS_SAR8: + return ((int8_t)cpu_state.flags_op1 >> (cpu_state.flags_op2 - 1)) & 1; + case FLAGS_SAR16: + return ((int16_t)cpu_state.flags_op1 >> (cpu_state.flags_op2 - 1)) & 1; + case FLAGS_SAR32: + return ((int32_t)cpu_state.flags_op1 >> (cpu_state.flags_op2 - 1)) & 1; + + case FLAGS_ZN8: + case FLAGS_ZN16: + case FLAGS_ZN32: + return 0; + + case FLAGS_DEC8: + case FLAGS_DEC16: + case FLAGS_DEC32: + case FLAGS_INC8: + case FLAGS_INC16: + case FLAGS_INC32: + case FLAGS_UNKNOWN: + return cpu_state.flags & C_FLAG; + + default: + return 0; + } +} + +static __inline void flags_rebuild() +{ + if (cpu_state.flags_op != FLAGS_UNKNOWN) + { + uint16_t tempf = 0; + if (CF_SET()) tempf |= C_FLAG; + if (PF_SET()) tempf |= P_FLAG; + if (AF_SET()) tempf |= A_FLAG; + if (ZF_SET()) tempf |= Z_FLAG; + if (NF_SET()) tempf |= N_FLAG; + if (VF_SET()) tempf |= V_FLAG; + cpu_state.flags = (cpu_state.flags & ~0x8d5) | tempf; + cpu_state.flags_op = FLAGS_UNKNOWN; + } +} + +static __inline void flags_extract() +{ + cpu_state.flags_op = FLAGS_UNKNOWN; +} + +static __inline void flags_rebuild_c() +{ + if (cpu_state.flags_op != FLAGS_UNKNOWN) + { + if (CF_SET()) + cpu_state.flags |= C_FLAG; + else + cpu_state.flags &= ~C_FLAG; + } +} + +static __inline void setznp8(uint8_t val) +{ + cpu_state.flags_op = FLAGS_ZN8; + cpu_state.flags_res = val; +} +static __inline void setznp16(uint16_t val) +{ + cpu_state.flags_op = FLAGS_ZN16; + cpu_state.flags_res = val; +} +static __inline void setznp32(uint32_t val) +{ + cpu_state.flags_op = FLAGS_ZN32; + cpu_state.flags_res = val; +} + +#define set_flags_shift(op, orig, shift, res) \ + cpu_state.flags_op = op; \ + cpu_state.flags_res = res; \ + cpu_state.flags_op1 = orig; \ + cpu_state.flags_op2 = shift; + +static __inline void setadd8(uint8_t a, uint8_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a + b) & 0xff; + cpu_state.flags_op = FLAGS_ADD8; +} +static __inline void setadd16(uint16_t a, uint16_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a + b) & 0xffff; + cpu_state.flags_op = FLAGS_ADD16; +} +static __inline void setadd32(uint32_t a, uint32_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = a + b; + cpu_state.flags_op = FLAGS_ADD32; +} +static __inline void setadd8nc(uint8_t a, uint8_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a + b) & 0xff; + cpu_state.flags_op = FLAGS_INC8; +} +static __inline void setadd16nc(uint16_t a, uint16_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a + b) & 0xffff; + cpu_state.flags_op = FLAGS_INC16; +} +static __inline void setadd32nc(uint32_t a, uint32_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = a + b; + cpu_state.flags_op = FLAGS_INC32; +} + +static __inline void setsub8(uint8_t a, uint8_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a - b) & 0xff; + cpu_state.flags_op = FLAGS_SUB8; +} +static __inline void setsub16(uint16_t a, uint16_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a - b) & 0xffff; + cpu_state.flags_op = FLAGS_SUB16; +} +static __inline void setsub32(uint32_t a, uint32_t b) +{ + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = a - b; + cpu_state.flags_op = FLAGS_SUB32; +} + +static __inline void setsub8nc(uint8_t a, uint8_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a - b) & 0xff; + cpu_state.flags_op = FLAGS_DEC8; +} +static __inline void setsub16nc(uint16_t a, uint16_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = (a - b) & 0xffff; + cpu_state.flags_op = FLAGS_DEC16; +} +static __inline void setsub32nc(uint32_t a, uint32_t b) +{ + flags_rebuild_c(); + cpu_state.flags_op1 = a; + cpu_state.flags_op2 = b; + cpu_state.flags_res = a - b; + cpu_state.flags_op = FLAGS_DEC32; +} + +static __inline void setadc8(uint8_t a, uint8_t b) +{ + uint16_t c=(uint16_t)a+(uint16_t)b+tempc; + cpu_state.flags_op = FLAGS_UNKNOWN; + cpu_state.flags&=~0x8D5; + cpu_state.flags|=znptable8[c&0xFF]; + if (c&0x100) cpu_state.flags|=C_FLAG; + if (!((a^b)&0x80)&&((a^c)&0x80)) cpu_state.flags|=V_FLAG; + if (((a&0xF)+(b&0xF))&0x10) cpu_state.flags|=A_FLAG; +} +static __inline void setadc16(uint16_t a, uint16_t b) +{ + uint32_t c=(uint32_t)a+(uint32_t)b+tempc; + cpu_state.flags_op = FLAGS_UNKNOWN; + cpu_state.flags&=~0x8D5; + cpu_state.flags|=znptable16[c&0xFFFF]; + if (c&0x10000) cpu_state.flags|=C_FLAG; + if (!((a^b)&0x8000)&&((a^c)&0x8000)) cpu_state.flags|=V_FLAG; + if (((a&0xF)+(b&0xF))&0x10) cpu_state.flags|=A_FLAG; +} +static __inline void setadc32(uint32_t a, uint32_t b) +{ + uint32_t c=(uint32_t)a+(uint32_t)b+tempc; + cpu_state.flags_op = FLAGS_UNKNOWN; + cpu_state.flags&=~0x8D5; + cpu_state.flags|=((c&0x80000000)?N_FLAG:((!c)?Z_FLAG:0)); + cpu_state.flags|=(znptable8[c&0xFF]&P_FLAG); + if ((ca) || (c==a && tempc)) cpu_state.flags|=C_FLAG; + if ((a^b)&(a^c)&0x80000000) cpu_state.flags|=V_FLAG; + if (((a&0xF)-((b&0xF)+tempc))&0x10) cpu_state.flags|=A_FLAG; +} + +extern void cpu_386_flags_extract(); +extern void cpu_386_flags_rebuild(); \ No newline at end of file diff --git a/src/codegen/x86_ops_call.h b/src/codegen/x86_ops_call.h new file mode 100644 index 000000000..8c52e632e --- /dev/null +++ b/src/codegen/x86_ops_call.h @@ -0,0 +1,457 @@ +#define CALL_FAR_w(new_seg, new_pc) \ + old_cs = CS; \ + old_pc = cpu_state.pc; \ + oxpc = cpu_state.pc; \ + cpu_state.pc = new_pc; \ + optype = CALL; \ + cgate16 = cgate32 = 0; \ + if (msw & 1) loadcscall(new_seg); \ + else \ + { \ + loadcs(new_seg); \ + cycles -= timing_call_rm; \ + } \ + optype = 0; \ + if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + oldss = ss; \ + if (cgate32) \ + { \ + uint32_t old_esp = ESP; \ + PUSH_L(old_cs); if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + PUSH_L(old_pc); if (cpu_state.abrt) { ESP = old_esp; return 1; } \ + } \ + else \ + { \ + uint32_t old_esp = ESP; \ + PUSH_W(old_cs); if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + PUSH_W(old_pc); if (cpu_state.abrt) { ESP = old_esp; return 1; } \ + } + +#define CALL_FAR_l(new_seg, new_pc) \ + old_cs = CS; \ + old_pc = cpu_state.pc; \ + oxpc = cpu_state.pc; \ + cpu_state.pc = new_pc; \ + optype = CALL; \ + cgate16 = cgate32 = 0; \ + if (msw & 1) loadcscall(new_seg); \ + else \ + { \ + loadcs(new_seg); \ + cycles -= timing_call_rm; \ + } \ + optype = 0; \ + if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + oldss = ss; \ + if (cgate16) \ + { \ + uint32_t old_esp = ESP; \ + PUSH_W(old_cs); if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + PUSH_W(old_pc); if (cpu_state.abrt) { ESP = old_esp; return 1; } \ + } \ + else \ + { \ + uint32_t old_esp = ESP; \ + PUSH_L(old_cs); if (cpu_state.abrt) { cgate16 = cgate32 = 0; return 1; } \ + PUSH_L(old_pc); if (cpu_state.abrt) { ESP = old_esp; return 1; } \ + } + + +static int opCALL_far_w(uint32_t fetchdat) +{ + uint32_t old_cs, old_pc; + uint16_t new_cs, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + new_pc = getwordf(); + new_cs = getword(); if (cpu_state.abrt) return 1; + + CALL_FAR_w(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 5, -1, 0,0,cgate16 ? 2:0,cgate16 ? 0:2, 0); + PREFETCH_FLUSH(); + + return 0; +} +static int opCALL_far_l(uint32_t fetchdat) +{ + uint32_t old_cs, old_pc; + uint32_t new_cs, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + new_pc = getlong(); + new_cs = getword(); if (cpu_state.abrt) return 1; + + CALL_FAR_l(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 7, -1, 0,0,cgate16 ? 2:0,cgate16 ? 0:2, 0); + PREFETCH_FLUSH(); + + return 0; +} + + +static int opFF_w_a16(uint32_t fetchdat) +{ + uint16_t old_cs, new_cs; + uint32_t old_pc, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + uint16_t temp; + + fetch_ea_16(fetchdat); + + switch (rmdat & 0x38) + { + case 0x00: /*INC w*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + seteaw(temp + 1); if (cpu_state.abrt) return 1; + setadd16nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, 0); + break; + case 0x08: /*DEC w*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + seteaw(temp - 1); if (cpu_state.abrt) return 1; + setsub16nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, 0); + break; + case 0x10: /*CALL*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteaw(); if (cpu_state.abrt) return 1; + PUSH_W(cpu_state.pc); + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN((cpu_mod == 3) ? 7 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,1,0, 0); + PREFETCH_FLUSH(); + break; + case 0x18: /*CALL far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = readmemw(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, (cpu_state.eaaddr + 2)); if (cpu_state.abrt) return 1; + + CALL_FAR_w(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 2,0,cgate16 ? 2:0,cgate16 ? 0:2, 0); + PREFETCH_FLUSH(); + break; + case 0x20: /*JMP*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteaw(); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN((cpu_mod == 3) ? 7 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,0,0, 0); + PREFETCH_FLUSH(); + break; + case 0x28: /*JMP far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + oxpc = cpu_state.pc; + new_pc = readmemw(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, cpu_state.eaaddr + 2); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + loadcsjmp(new_cs, oxpc); if (cpu_state.abrt) return 1; + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 2,0,0,0, 0); + PREFETCH_FLUSH(); + break; + case 0x30: /*PUSH w*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + PUSH_W(temp); + CLOCK_CYCLES((cpu_mod == 3) ? 2 : 5); + PREFETCH_RUN((cpu_mod == 3) ? 2 : 5, 2, rmdat, (cpu_mod == 3) ? 0:1,0,1,0, 0); + break; + + default: +// fatal("Bad FF opcode %02X\n",rmdat&0x38); + x86illegal(); + } + return cpu_state.abrt; +} +static int opFF_w_a32(uint32_t fetchdat) +{ + uint16_t old_cs, new_cs; + uint32_t old_pc, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + uint16_t temp; + + fetch_ea_32(fetchdat); + + switch (rmdat & 0x38) + { + case 0x00: /*INC w*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + seteaw(temp + 1); if (cpu_state.abrt) return 1; + setadd16nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, 1); + break; + case 0x08: /*DEC w*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + seteaw(temp - 1); if (cpu_state.abrt) return 1; + setsub16nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, 1); + break; + case 0x10: /*CALL*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteaw(); if (cpu_state.abrt) return 1; + PUSH_W(cpu_state.pc); + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN((cpu_mod == 3) ? 7 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,1,0, 1); + PREFETCH_FLUSH(); + break; + case 0x18: /*CALL far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = readmemw(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, (cpu_state.eaaddr + 2)); if (cpu_state.abrt) return 1; + + CALL_FAR_w(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 2,0,cgate16 ? 2:0,cgate16 ? 0:2, 1); + PREFETCH_FLUSH(); + break; + case 0x20: /*JMP*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteaw(); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,0,0,0, 1); + PREFETCH_FLUSH(); + break; + case 0x28: /*JMP far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + oxpc = cpu_state.pc; + new_pc = readmemw(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, cpu_state.eaaddr + 2); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + loadcsjmp(new_cs, oxpc); if (cpu_state.abrt) return 1; + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 2,0,0,0, 1); + PREFETCH_FLUSH(); + break; + case 0x30: /*PUSH w*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + PUSH_W(temp); + CLOCK_CYCLES((cpu_mod == 3) ? 2 : 5); + PREFETCH_RUN((cpu_mod == 3) ? 2 : 5, 2, rmdat, (cpu_mod == 3) ? 0:1,0,1,0, 1); + break; + + default: +// fatal("Bad FF opcode %02X\n",rmdat&0x38); + x86illegal(); + } + return cpu_state.abrt; +} + +static int opFF_l_a16(uint32_t fetchdat) +{ + uint16_t old_cs, new_cs; + uint32_t old_pc, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + uint32_t temp; + + fetch_ea_16(fetchdat); + + switch (rmdat & 0x38) + { + case 0x00: /*INC l*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + seteal(temp + 1); if (cpu_state.abrt) return 1; + setadd32nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 0); + break; + case 0x08: /*DEC l*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + seteal(temp - 1); if (cpu_state.abrt) return 1; + setsub32nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 0); + break; + case 0x10: /*CALL*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteal(); if (cpu_state.abrt) return 1; + PUSH_L(cpu_state.pc); + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN((cpu_mod == 3) ? 7 : 10, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,1, 0); + PREFETCH_FLUSH(); + break; + case 0x18: /*CALL far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = readmeml(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, (cpu_state.eaaddr + 4)); if (cpu_state.abrt) return 1; + + CALL_FAR_l(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,1,cgate16 ? 2:0,cgate16 ? 0:2, 0); + PREFETCH_FLUSH(); + break; + case 0x20: /*JMP*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteal(); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 0,1,0,0, 0); + PREFETCH_FLUSH(); + break; + case 0x28: /*JMP far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + oxpc = cpu_state.pc; + new_pc = readmeml(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, cpu_state.eaaddr + 4); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + loadcsjmp(new_cs, oxpc); if (cpu_state.abrt) return 1; + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,1,0,0, 0); + PREFETCH_FLUSH(); + break; + case 0x30: /*PUSH l*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + PUSH_L(temp); + CLOCK_CYCLES((cpu_mod == 3) ? 2 : 5); + PREFETCH_RUN((cpu_mod == 3) ? 2 : 5, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,1, 0); + break; + + default: +// fatal("Bad FF opcode %02X\n",rmdat&0x38); + x86illegal(); + } + return cpu_state.abrt; +} +static int opFF_l_a32(uint32_t fetchdat) +{ + uint16_t old_cs, new_cs; + uint32_t old_pc, new_pc; + int cycles_old = cycles; UN_USED(cycles_old); + + uint32_t temp; + + fetch_ea_32(fetchdat); + + switch (rmdat & 0x38) + { + case 0x00: /*INC l*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + seteal(temp + 1); if (cpu_state.abrt) return 1; + setadd32nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 1); + break; + case 0x08: /*DEC l*/ + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + seteal(temp - 1); if (cpu_state.abrt) return 1; + setsub32nc(temp, 1); + CLOCK_CYCLES((cpu_mod == 3) ? timing_rr : timing_mm); + PREFETCH_RUN((cpu_mod == 3) ? timing_rr : timing_mm, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 1); + break; + case 0x10: /*CALL*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteal(); if (cpu_state.abrt) return 1; + PUSH_L(cpu_state.pc); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN((cpu_mod == 3) ? 7 : 10, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,1, 1); + PREFETCH_FLUSH(); + break; + case 0x18: /*CALL far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = readmeml(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, (cpu_state.eaaddr + 4)); if (cpu_state.abrt) return 1; + + CALL_FAR_l(new_cs, new_pc); + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,1,cgate16 ? 2:0,cgate16 ? 0:2, 1); + PREFETCH_FLUSH(); + break; + case 0x20: /*JMP*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + new_pc = geteal(); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + CPU_BLOCK_END(); + if (is486) CLOCK_CYCLES(5); + else CLOCK_CYCLES((cpu_mod == 3) ? 7 : 10); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,1,0,0, 1); + PREFETCH_FLUSH(); + break; + case 0x28: /*JMP far*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + oxpc = cpu_state.pc; + new_pc = readmeml(easeg, cpu_state.eaaddr); + new_cs = readmemw(easeg, cpu_state.eaaddr + 4); if (cpu_state.abrt) return 1; + cpu_state.pc = new_pc; + loadcsjmp(new_cs, oxpc); if (cpu_state.abrt) return 1; + CPU_BLOCK_END(); + PREFETCH_RUN(cycles_old-cycles, 2, rmdat, 1,1,0,0, 1); + PREFETCH_FLUSH(); + break; + case 0x30: /*PUSH l*/ + if (cpu_mod != 3) + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + PUSH_L(temp); + PREFETCH_RUN((cpu_mod == 3) ? 2 : 5, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,1, 1); + break; + + default: +// fatal("Bad FF opcode %02X\n",rmdat&0x38); + x86illegal(); + } + return cpu_state.abrt; +} diff --git a/src/codegen/x86_ops_shift.h b/src/codegen/x86_ops_shift.h new file mode 100644 index 000000000..5cf44943d --- /dev/null +++ b/src/codegen/x86_ops_shift.h @@ -0,0 +1,619 @@ +#define OP_SHIFT_b(c, ea32) \ + { \ + uint8_t temp_orig = temp; \ + if (!c) return 0; \ + flags_rebuild(); \ + switch (rmdat & 0x38) \ + { \ + case 0x00: /*ROL b, c*/ \ + temp = (temp << (c & 7)) | (temp >> (8-(c & 7))); \ + seteab(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 1) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 7)) & 1) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x08: /*ROR b,CL*/ \ + temp = (temp >> (c & 7)) | (temp << (8-(c & 7))); \ + seteab(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 0x80) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x40) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x10: /*RCL b,CL*/ \ + temp2 = cpu_state.flags & C_FLAG; \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 1 : 0; \ + temp2 = temp & 0x80; \ + temp = (temp << 1) | tempc; \ + c--; \ + } \ + seteab(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((cpu_state.flags & C_FLAG) ^ (temp >> 7)) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x18: /*RCR b,CL*/ \ + temp2 = cpu_state.flags & C_FLAG; \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 0x80 : 0; \ + temp2 = temp & 1; \ + temp = (temp >> 1) | tempc; \ + c--; \ + } \ + seteab(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x40) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x20: case 0x30: /*SHL b,CL*/ \ + seteab(temp << c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHL8, temp_orig, c, (temp << c) & 0xff); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x28: /*SHR b,CL*/ \ + seteab(temp >> c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHR8, temp_orig, c, temp >> c); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x38: /*SAR b,CL*/ \ + temp = (int8_t)temp >> c; \ + seteab(temp); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SAR8, temp_orig, c, temp); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + } \ + } + +#define OP_SHIFT_w(c, ea32) \ + { \ + uint16_t temp_orig = temp; \ + if (!c) return 0; \ + flags_rebuild(); \ + switch (rmdat & 0x38) \ + { \ + case 0x00: /*ROL w, c*/ \ + temp = (temp << (c & 15)) | (temp >> (16-(c & 15))); \ + seteaw(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 1) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 15)) & 1) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x08: /*ROR w,CL*/ \ + temp = (temp >> (c & 15)) | (temp << (16-(c & 15))); \ + seteaw(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 0x8000) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x4000) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x10: /*RCL w, c*/ \ + temp2 = cpu_state.flags & C_FLAG; \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 1 : 0; \ + temp2 = temp & 0x8000; \ + temp = (temp << 1) | tempc; \ + c--; \ + } \ + seteaw(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((cpu_state.flags & C_FLAG) ^ (temp >> 15)) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x18: /*RCR w, c*/ \ + temp2 = cpu_state.flags & C_FLAG; \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 0x8000 : 0; \ + temp2 = temp & 1; \ + temp = (temp >> 1) | tempc; \ + c--; \ + } \ + seteaw(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x4000) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x20: case 0x30: /*SHL w, c*/ \ + seteaw(temp << c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHL16, temp_orig, c, (temp << c) & 0xffff); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x28: /*SHR w, c*/ \ + seteaw(temp >> c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHR16, temp_orig, c, temp >> c); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x38: /*SAR w, c*/ \ + temp = (int16_t)temp >> c; \ + seteaw(temp); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SAR16, temp_orig, c, temp); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + } \ + } + +#define OP_SHIFT_l(c, ea32) \ + { \ + uint32_t temp_orig = temp; \ + if (!c) return 0; \ + flags_rebuild(); \ + switch (rmdat & 0x38) \ + { \ + case 0x00: /*ROL l, c*/ \ + temp = (temp << c) | (temp >> (32-c)); \ + seteal(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 1) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 31)) & 1) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x08: /*ROR l,CL*/ \ + temp = (temp >> c) | (temp << (32-c)); \ + seteal(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp & 0x80000000) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x40000000) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, (cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1,0, ea32); \ + break; \ + case 0x10: /*RCL l, c*/ \ + temp2 = CF_SET(); \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 1 : 0; \ + temp2 = temp & 0x80000000; \ + temp = (temp << 1) | tempc; \ + c--; \ + } \ + seteal(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((cpu_state.flags & C_FLAG) ^ (temp >> 31)) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, ea32); \ + break; \ + case 0x18: /*RCR l, c*/ \ + temp2 = cpu_state.flags & C_FLAG; \ + if (is486) CLOCK_CYCLES_ALWAYS(c); \ + while (c > 0) \ + { \ + tempc = temp2 ? 0x80000000 : 0; \ + temp2 = temp & 1; \ + temp = (temp >> 1) | tempc; \ + c--; \ + } \ + seteal(temp); if (cpu_state.abrt) return 1; \ + cpu_state.flags &= ~(C_FLAG | V_FLAG); \ + if (temp2) cpu_state.flags |= C_FLAG; \ + if ((temp ^ (temp >> 1)) & 0x40000000) cpu_state.flags |= V_FLAG; \ + CLOCK_CYCLES((cpu_mod == 3) ? 9 : 10); \ + PREFETCH_RUN((cpu_mod == 3) ? 9 : 10, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, ea32); \ + break; \ + case 0x20: case 0x30: /*SHL l, c*/ \ + seteal(temp << c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHL32, temp_orig, c, temp << c); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, ea32); \ + break; \ + case 0x28: /*SHR l, c*/ \ + seteal(temp >> c); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SHR32, temp_orig, c, temp >> c); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, ea32); \ + break; \ + case 0x38: /*SAR l, c*/ \ + temp = (int32_t)temp >> c; \ + seteal(temp); if (cpu_state.abrt) return 1; \ + set_flags_shift(FLAGS_SAR32, temp_orig, c, temp); \ + CLOCK_CYCLES((cpu_mod == 3) ? 3 : 7); \ + PREFETCH_RUN((cpu_mod == 3) ? 3 : 7, 2, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, ea32); \ + break; \ + } \ + } + +static int opC0_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 0); + return 0; +} +static int opC0_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 1); + return 0; +} +static int opC1_w_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 0); + return 0; +} +static int opC1_w_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 1); + return 0; +} +static int opC1_l_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 0); + return 0; +} +static int opC1_l_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = readmemb(cs, cpu_state.pc) & 31; cpu_state.pc++; + PREFETCH_PREFIX(); + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 1); + return 0; +} + +static int opD0_a16(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 0); + return 0; +} +static int opD0_a32(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 1); + return 0; +} +static int opD1_w_a16(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 0); + return 0; +} +static int opD1_w_a32(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 1); + return 0; +} +static int opD1_l_a16(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 0); + return 0; +} +static int opD1_l_a32(uint32_t fetchdat) +{ + int c = 1; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 1); + return 0; +} + +static int opD2_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 0); + return 0; +} +static int opD2_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint8_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteab(); if (cpu_state.abrt) return 1; + OP_SHIFT_b(c, 1); + return 0; +} +static int opD3_w_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 0); + return 0; +} +static int opD3_w_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint16_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteaw(); if (cpu_state.abrt) return 1; + OP_SHIFT_w(c, 1); + return 0; +} +static int opD3_l_a16(uint32_t fetchdat) +{ + int c; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_16(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 0); + return 0; +} +static int opD3_l_a32(uint32_t fetchdat) +{ + int c; + int tempc; + uint32_t temp, temp2 = 0; + + fetch_ea_32(fetchdat); + if (cpu_mod != 3) + SEG_CHECK_WRITE(cpu_state.ea_seg); + c = CL & 31; + temp = geteal(); if (cpu_state.abrt) return 1; + OP_SHIFT_l(c, 1); + return 0; +} + + +#define SHLD_w() \ + if (count) \ + { \ + int tempc; \ + uint32_t templ; \ + uint16_t tempw = geteaw(); if (cpu_state.abrt) return 1; \ + tempc = ((tempw << (count - 1)) & (1 << 15)) ? 1 : 0; \ + templ = (tempw << 16) | cpu_state.regs[cpu_reg].w; \ + if (count <= 16) tempw = templ >> (16 - count); \ + else tempw = (templ << count) >> 16; \ + seteaw(tempw); if (cpu_state.abrt) return 1; \ + setznp16(tempw); \ + flags_rebuild(); \ + if (tempc) cpu_state.flags |= C_FLAG; \ + } + +#define SHLD_l() \ + if (count) \ + { \ + int tempc; \ + uint32_t templ = geteal(); if (cpu_state.abrt) return 1; \ + tempc = ((templ << (count - 1)) & (1 << 31)) ? 1 : 0; \ + templ = (templ << count) | (cpu_state.regs[cpu_reg].l >> (32 - count)); \ + seteal(templ); if (cpu_state.abrt) return 1; \ + setznp32(templ); \ + flags_rebuild(); \ + if (tempc) cpu_state.flags |= C_FLAG; \ + } + + +#define SHRD_w() \ + if (count) \ + { \ + int tempc; \ + uint32_t templ; \ + uint16_t tempw = geteaw(); if (cpu_state.abrt) return 1; \ + tempc = (tempw >> (count - 1)) & 1; \ + templ = tempw | (cpu_state.regs[cpu_reg].w << 16); \ + tempw = templ >> count; \ + seteaw(tempw); if (cpu_state.abrt) return 1; \ + setznp16(tempw); \ + flags_rebuild(); \ + if (tempc) cpu_state.flags |= C_FLAG; \ + } + +#define SHRD_l() \ + if (count) \ + { \ + int tempc; \ + uint32_t templ = geteal(); if (cpu_state.abrt) return 1; \ + tempc = (templ >> (count - 1)) & 1; \ + templ = (templ >> count) | (cpu_state.regs[cpu_reg].l << (32 - count)); \ + seteal(templ); if (cpu_state.abrt) return 1; \ + setznp32(templ); \ + flags_rebuild(); \ + if (tempc) cpu_state.flags |= C_FLAG; \ + } + +#define opSHxD(operation) \ + static int op ## operation ## _i_a16(uint32_t fetchdat) \ + { \ + int count; \ + \ + fetch_ea_16(fetchdat); \ + if (cpu_mod != 3) \ + SEG_CHECK_WRITE(cpu_state.ea_seg); \ + count = getbyte() & 31; \ + operation() \ + \ + CLOCK_CYCLES(3); \ + PREFETCH_RUN(3, 3, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 0); \ + return 0; \ + } \ + static int op ## operation ## _CL_a16(uint32_t fetchdat) \ + { \ + int count; \ + \ + fetch_ea_16(fetchdat); \ + if (cpu_mod != 3) \ + SEG_CHECK_WRITE(cpu_state.ea_seg); \ + count = CL & 31; \ + operation() \ + \ + CLOCK_CYCLES(3); \ + PREFETCH_RUN(3, 3, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 0); \ + return 0; \ + } \ + static int op ## operation ## _i_a32(uint32_t fetchdat) \ + { \ + int count; \ + \ + fetch_ea_32(fetchdat); \ + if (cpu_mod != 3) \ + SEG_CHECK_WRITE(cpu_state.ea_seg); \ + count = getbyte() & 31; \ + operation() \ + \ + CLOCK_CYCLES(3); \ + PREFETCH_RUN(3, 3, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 1); \ + return 0; \ + } \ + static int op ## operation ## _CL_a32(uint32_t fetchdat) \ + { \ + int count; \ + \ + fetch_ea_32(fetchdat); \ + if (cpu_mod != 3) \ + SEG_CHECK_WRITE(cpu_state.ea_seg); \ + count = CL & 31; \ + operation() \ + \ + CLOCK_CYCLES(3); \ + PREFETCH_RUN(3, 3, rmdat, 0,(cpu_mod == 3) ? 0:1,0,(cpu_mod == 3) ? 0:1, 1); \ + return 0; \ + } + +opSHxD(SHLD_w) +opSHxD(SHLD_l) +opSHxD(SHRD_w) +opSHxD(SHRD_l) diff --git a/src/codegen/x86seg.c b/src/codegen/x86seg.c new file mode 100644 index 000000000..c903fbb79 --- /dev/null +++ b/src/codegen/x86seg.c @@ -0,0 +1,2607 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * x86 CPU segment emulation. + * + * + * + * Authors: Sarah Walker, + * Miran Grca, + * + * Copyright 2008-2018 Sarah Walker. + * Copyright 2016-2018 Miran Grca. + */ +#include +#include +#include +#include +#include +#include +#include +#define HAVE_STDARG_H +#include <86box/86box.h> +#include "cpu.h" +#include <86box/device.h> +#include <86box/timer.h> +#include <86box/machine.h> +#include <86box/mem.h> +#include <86box/nvr.h> +#include "x86.h" +#include "x86_flags.h" +#include "386_common.h" + + +/*Controls whether the accessed bit in a descriptor is set when CS is loaded.*/ +#define CS_ACCESSED + +/*Controls whether the accessed bit in a descriptor is set when a data or stack + selector is loaded.*/ +#define SEL_ACCESSED +int stimes = 0; +int dtimes = 0; +int btimes = 0; + +uint32_t abrt_error; +int cgate16, cgate32; + +#define breaknullsegs 0 + +int intgatesize; + +void taskswitch286(uint16_t seg, uint16_t *segdat, int is32); +void taskswitch386(uint16_t seg, uint16_t *segdat); + +void pmodeint(int num, int soft); +/*NOT PRESENT is INT 0B + GPF is INT 0D*/ + + +#ifdef ENABLE_X86SEG_LOG +int x86seg_do_log = ENABLE_X86SEG_LOG; + + +static void +x86seg_log(const char *fmt, ...) +{ + va_list ap; + + if (x86seg_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define x86seg_log(fmt, ...) +#endif + + +void x86abort(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + + nvr_save(); +#ifdef ENABLE_808X_LOG + dumpregs(1); +#endif + fflush(stdlog); + exit(-1); +} + +uint8_t opcode2; + +static void seg_reset(x86seg *s) +{ + s->access = (0 << 5) | 2 | 0x80; + s->ar_high = 0x10; + s->limit = 0xFFFF; + s->limit_low = 0; + s->limit_high = 0xffff; + if(s == &cpu_state.seg_cs) + { + // TODO - When the PC is reset, initialization of the CS descriptor must be like the annotated line below. + s->base = AT ? (cpu_16bitbus ? 0xFF0000 : 0xFFFF0000) : 0xFFFF0; + // s->base = AT ? 0xF0000 : 0xFFFF0; + s->seg = AT ? 0xF000 : 0xFFFF; + } + else + { + s->base = 0; + s->seg = 0; + } + +} + +void x86seg_reset() +{ + seg_reset(&cpu_state.seg_cs); + seg_reset(&cpu_state.seg_ds); + seg_reset(&cpu_state.seg_es); + seg_reset(&cpu_state.seg_fs); + seg_reset(&cpu_state.seg_gs); + seg_reset(&cpu_state.seg_ss); +} + +void x86_doabrt(int x86_abrt) +{ + CS = oldcs; + cpu_state.pc = cpu_state.oldpc; + cpu_state.seg_cs.access = (oldcpl << 5) | 0x80; + cpu_state.seg_cs.ar_high = 0x10; + + if (msw & 1) + pmodeint(x86_abrt, 0); + else + { + uint32_t addr = (x86_abrt << 2) + idt.base; + if (stack32) + { + writememw(ss,ESP-2,cpu_state.flags); + writememw(ss,ESP-4,CS); + writememw(ss,ESP-6,cpu_state.pc); + ESP-=6; + } + else + { + writememw(ss,((SP-2)&0xFFFF),cpu_state.flags); + writememw(ss,((SP-4)&0xFFFF),CS); + writememw(ss,((SP-6)&0xFFFF),cpu_state.pc); + SP-=6; + } + + cpu_state.flags&=~I_FLAG; + cpu_state.flags&=~T_FLAG; + oxpc=cpu_state.pc; + cpu_state.pc=readmemw(0,addr); + loadcs(readmemw(0,addr+2)); + return; + } + + if (cpu_state.abrt || x86_was_reset) return; + + if (intgatesize == 16) + { + if (stack32) + { + writememw(ss, ESP-2, abrt_error); + ESP-=2; + } + else + { + writememw(ss, ((SP-2)&0xFFFF), abrt_error); + SP-=2; + } + } + else + { + if (stack32) + { + writememl(ss, ESP-4, abrt_error); + ESP-=4; + } + else + { + writememl(ss, ((SP-4)&0xFFFF), abrt_error); + SP-=4; + } + } +} +void x86gpf(char *s, uint16_t error) +{ + cpu_state.abrt = ABRT_GPF; + abrt_error = error; +} +void x86ss(char *s, uint16_t error) +{ + cpu_state.abrt = ABRT_SS; + abrt_error = error; +} +void x86ts(char *s, uint16_t error) +{ + cpu_state.abrt = ABRT_TS; + abrt_error = error; +} +void x86np(char *s, uint16_t error) +{ + cpu_state.abrt = ABRT_NP; + abrt_error = error; +} + + +static void set_stack32(int s) +{ + stack32 = s; + if (stack32) + cpu_cur_status |= CPU_STATUS_STACK32; + else + cpu_cur_status &= ~CPU_STATUS_STACK32; +} + +static void set_use32(int u) +{ + if (u) + { + use32 = 0x300; + cpu_cur_status |= CPU_STATUS_USE32; + } + else + { + use32 = 0; + cpu_cur_status &= ~CPU_STATUS_USE32; + } +} + +void do_seg_load(x86seg *s, uint16_t *segdat) +{ + s->limit = segdat[0] | ((segdat[3] & 0xF) << 16); + if (segdat[3] & 0x80) + s->limit = (s->limit << 12) | 0xFFF; + s->base = segdat[1] | ((segdat[2] & 0xFF) << 16); + if (is386) + s->base |= ((segdat[3] >> 8) << 24); + s->access = segdat[2] >> 8; + s->ar_high = segdat[3] & 0xff; + + if ((segdat[2] & 0x1800) != 0x1000 || !(segdat[2] & (1 << 10))) /*expand-down*/ + { + s->limit_high = s->limit; + s->limit_low = 0; + } + else + { + s->limit_high = (segdat[3] & 0x40) ? 0xffffffff : 0xffff; + s->limit_low = s->limit + 1; + } + + if (s == &cpu_state.seg_ds) + { + if (s->base == 0 && s->limit_low == 0 && s->limit_high == 0xffffffff) + cpu_cur_status &= ~CPU_STATUS_NOTFLATDS; + else + cpu_cur_status |= CPU_STATUS_NOTFLATDS; + } + if (s == &cpu_state.seg_ss) + { + if (s->base == 0 && s->limit_low == 0 && s->limit_high == 0xffffffff) + cpu_cur_status &= ~CPU_STATUS_NOTFLATSS; + else + cpu_cur_status |= CPU_STATUS_NOTFLATSS; + } +} + +static void do_seg_v86_init(x86seg *s) +{ + s->access = (3 << 5) | 2 | 0x80; + s->ar_high = 0x10; + s->limit = 0xffff; + s->limit_low = 0; + s->limit_high = 0xffff; +} + +static void check_seg_valid(x86seg *s) +{ + int dpl = (s->access >> 5) & 3; + int valid = 1; + + if (s->seg & 4) + { + if ((s->seg & ~7) >= ldt.limit) + { + valid = 0; + } + } + else + { + if ((s->seg & ~7) >= gdt.limit) + { + valid = 0; + } + } + + switch (s->access & 0x1f) + { + case 0x10: case 0x11: case 0x12: case 0x13: /*Data segments*/ + case 0x14: case 0x15: case 0x16: case 0x17: + case 0x1A: case 0x1B: /*Readable non-conforming code*/ + if ((s->seg & 3) > dpl || (CPL) > dpl) + { + valid = 0; + break; + } + break; + + case 0x1E: case 0x1F: /*Readable conforming code*/ + break; + + default: + valid = 0; + break; + } + + if (!valid) + loadseg(0, s); +} + +void loadseg(uint16_t seg, x86seg *s) +{ + uint16_t segdat[4]; + uint32_t addr; + int dpl; + + if (msw&1 && !(cpu_state.eflags&VM_FLAG)) + { + if (!(seg&~3)) + { + if (s==&cpu_state.seg_ss) + { + x86ss(NULL,0); + return; + } + s->seg = 0; + s->access = 0x80; + s->ar_high = 0x10; + s->base=-1; + if (s == &cpu_state.seg_ds) + cpu_cur_status |= CPU_STATUS_NOTFLATDS; + return; + } + addr=seg&~7; + if (seg&4) + { +#if 0 + if (addr>=ldt.limit) +#else + if ((addr+7)>ldt.limit) +#endif + { + x86gpf("loadseg(): Bigger than LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { +#if 0 + if (addr>=gdt.limit) +#else + if ((addr+7)>gdt.limit) +#endif + { + x86gpf("loadseg(): Bigger than GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + dpl=(segdat[2]>>13)&3; + if (s==&cpu_state.seg_ss) + { + if (!(seg&~3)) + { + x86gpf("loadseg(): Stack segment is zero",seg&~3); + return; + } + if ((seg&3)!=CPL) + { + x86gpf("loadseg(): Stack segment RPL != CPL",seg&~3); + return; + } + if (dpl!=CPL) + { + x86gpf("loadseg(): Stack segment DPL != CPL",seg&~3); + return; + } + switch ((segdat[2]>>8)&0x1F) + { + case 0x12: case 0x13: case 0x16: case 0x17: /*r/w*/ + break; + default: + x86gpf("loadseg(): Unknown stack segment type",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86ss(NULL,seg&~3); + return; + } + set_stack32((segdat[3] & 0x40) ? 1 : 0); + } + else if (s!=&cpu_state.seg_cs) + { + x86seg_log("Seg data %04X %04X %04X %04X\n", segdat[0], segdat[1], segdat[2], segdat[3]); + x86seg_log("Seg type %03X\n",segdat[2]&0x1F00); + switch ((segdat[2]>>8)&0x1F) + { + case 0x10: case 0x11: case 0x12: case 0x13: /*Data segments*/ + case 0x14: case 0x15: case 0x16: case 0x17: + case 0x1A: case 0x1B: /*Readable non-conforming code*/ + if ((seg&3)>dpl) + { + x86gpf("loadseg(): Normal segment is zero",seg&~3); + return; + } + if ((CPL)>dpl) + { + x86gpf("loadseg(): Normal segment DPL < CPL",seg&~3); + return; + } + break; + case 0x1E: case 0x1F: /*Readable conforming code*/ + break; + default: + x86gpf("loadseg(): Unknown normal segment type",seg&~3); + return; + } + } + + if (!(segdat[2] & 0x8000)) + { + x86np("Load data seg not present", seg & 0xfffc); + return; + } + s->seg = seg; + do_seg_load(s, segdat); + +#ifndef CS_ACCESSED + if (s != &cpu_state.seg_cs) + { +#endif +#ifdef SEL_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif +#ifndef CS_ACCESSED + } +#endif + s->checked = 0; +#ifdef USE_DYNAREC + if (s == &cpu_state.seg_ds) + codegen_flat_ds = 0; + if (s == &cpu_state.seg_ss) + codegen_flat_ss = 0; +#endif + } + else + { + s->access = (3 << 5) | 2 | 0x80; + s->ar_high = 0x10; + s->base = seg << 4; + s->seg = seg; + s->checked = 1; +#ifdef USE_DYNAREC + if (s == &cpu_state.seg_ds) + codegen_flat_ds = 0; + if (s == &cpu_state.seg_ss) + codegen_flat_ss = 0; +#endif + if (s == &cpu_state.seg_ss && (cpu_state.eflags & VM_FLAG)) + set_stack32(0); + } + + if (s == &cpu_state.seg_ds) + { + if (s->base == 0 && s->limit_low == 0 && s->limit_high == 0xffffffff) + cpu_cur_status &= ~CPU_STATUS_NOTFLATDS; + else + cpu_cur_status |= CPU_STATUS_NOTFLATDS; + } + if (s == &cpu_state.seg_ss) + { + if (s->base == 0 && s->limit_low == 0 && s->limit_high == 0xffffffff) + cpu_cur_status &= ~CPU_STATUS_NOTFLATSS; + else + cpu_cur_status |= CPU_STATUS_NOTFLATSS; + } +} + +#define DPL ((segdat[2]>>13)&3) +#define DPL2 ((segdat2[2]>>13)&3) +#define DPL3 ((segdat3[2]>>13)&3) + +void loadcs(uint16_t seg) +{ + uint16_t segdat[4]; + uint32_t addr; + x86seg_log("Load CS %04X\n",seg); + if (msw&1 && !(cpu_state.eflags&VM_FLAG)) + { + if (!(seg&~3)) + { + x86gpf(NULL,0); + return; + } + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + x86gpf("loadcs(): Protected mode selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("loadcs(): Protected mode selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + if (segdat[2]&0x1000) /*Normal code segment*/ + { + if (!(segdat[2]&0x400)) /*Not conforming*/ + { + if ((seg&3)>CPL) + { + x86gpf("loadcs(): Non-conforming RPL > CPL",seg&~3); + return; + } + if (CPL != DPL) + { + x86gpf("loadcs(): Non-conforming CPL != DPL",seg&~3); + return; + } + } + if (CPL < DPL) + { + x86gpf("loadcs(): CPL < DPL",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86np("Load CS not present", seg & 0xfffc); + return; + } + set_use32(segdat[3] & 0x40); + CS=(seg&~3)|CPL; + do_seg_load(&cpu_state.seg_cs, segdat); + use32=(segdat[3]&0x40)?0x300:0; + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + } + else /*System segment*/ + { + if (!(segdat[2]&0x8000)) + { + x86np("Load CS system seg not present", seg & 0xfffc); + return; + } + switch (segdat[2]&0xF00) + { + default: + x86gpf("Load CS system segment has bits 0-3 of access rights set",seg&~3); + return; + } + } + } + else + { + cpu_state.seg_cs.base=seg<<4; + cpu_state.seg_cs.limit=0xFFFF; + cpu_state.seg_cs.limit_low = 0; + cpu_state.seg_cs.limit_high = 0xffff; + CS=seg & 0xFFFF; + if (cpu_state.eflags&VM_FLAG) cpu_state.seg_cs.access=(3<<5) | 2 | 0x80; + else cpu_state.seg_cs.access=(0<<5) | 2 | 0x80; + cpu_state.seg_cs.ar_high = 0x10; + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + } +} + +void loadcsjmp(uint16_t seg, uint32_t old_pc) +{ + uint16_t segdat[4]; + uint32_t addr; + uint16_t type,seg2; + uint32_t newpc; + if (msw&1 && !(cpu_state.eflags&VM_FLAG)) + { + if (!(seg&~3)) + { + x86gpf("loadcsjmp(): Selector is zero",0); + return; + } + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + x86gpf("loacsjmp(): Selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("loacsjmp(): Selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + x86seg_log("%04X %04X %04X %04X\n",segdat[0],segdat[1],segdat[2],segdat[3]); + if (segdat[2]&0x1000) /*Normal code segment*/ + { + if (!(segdat[2]&0x400)) /*Not conforming*/ + { + if ((seg&3)>CPL) + { + x86gpf("loadcsjmp(): segment PL > CPL",seg&~3); + return; + } + if (CPL != DPL) + { + x86gpf("loadcsjmp(): CPL != DPL",seg&~3); + return; + } + } + if (CPL < DPL) + { + x86gpf("loadcsjmp(): CPL < DPL",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86np("Load CS JMP not present\n", seg & 0xfffc); + return; + } + set_use32(segdat[3]&0x40); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + CS = (seg & ~3) | CPL; + segdat[2] = (segdat[2] & ~(3 << (5+8))) | (CPL << (5+8)); + + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + cycles -= timing_jmp_pm; + } + else /*System segment*/ + { + if (!(segdat[2]&0x8000)) + { + x86np("Load CS JMP system selector not present\n", seg & 0xfffc); + return; + } + type=segdat[2]&0xF00; + newpc=segdat[0]; + if (type&0x800) newpc|=segdat[3]<<16; + switch (type) + { + case 0x400: /*Call gate*/ + case 0xC00: + cgate32=(type&0x800); + cgate16=!cgate32; + oldcs=CS; + cpu_state.oldpc = cpu_state.pc; + if (DPL < CPL) + { + x86gpf("loadcsjmp(): Call gate DPL < CPL",seg&~3); + return; + } + if (DPL < (seg&3)) + { + x86gpf("loadcsjmp(): Call gate DPL< RPL",seg&~3); + return; + } + if (DPL < CPL) + { + x86gpf("loadcsjmp(): ex DPL < CPL",seg&~3); + return; + } + if ((DPL < (seg&3))) + { + x86gpf("loadcsjmp(): ex (DPL < (seg&3))",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86np("Load CS JMP call gate not present\n", seg & 0xfffc); + return; + } + seg2=segdat[1]; + + if (!(seg2&~3)) + { + x86gpf(NULL,0); + return; + } + addr=seg2&~7; + if (seg2&4) + { + if (addr>=ldt.limit) + { + x86gpf("loadcsjmp(): Call gate selector > LDT limit",seg2&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("loadcsjmp(): Call gate selector > GDT limit",seg2&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + + if (DPL > CPL) + { + x86gpf("loadcsjmp(): ex DPL > CPL",seg2&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86np("Load CS JMP from call gate not present\n", seg2 & 0xfffc); + return; + } + + + switch (segdat[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming code*/ + if (DPL > CPL) + { + x86gpf("loadcsjmp(): Non-conforming DPL > CPL",seg2&~3); + return; + } + /*FALLTHROUGH*/ + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + CS=seg2; + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + + set_use32(segdat[3]&0x40); + cpu_state.pc=newpc; + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + break; + + default: + x86gpf("loadcsjmp(): Unknown type",seg2&~3); + return; + } + cycles -= timing_jmp_pm_gate; + break; + + + case 0x100: /*286 Task gate*/ + case 0x900: /*386 Task gate*/ + cpu_state.pc=old_pc; + optype=JMP; + cpl_override=1; + taskswitch286(seg,segdat,segdat[2]&0x800); + cpu_state.flags &= ~NT_FLAG; + cpl_override=0; + return; + + default: + x86gpf(NULL,0); + return; + } + } + } + else + { + cpu_state.seg_cs.base=seg<<4; + cpu_state.seg_cs.limit=0xFFFF; + cpu_state.seg_cs.limit_low = 0; + cpu_state.seg_cs.limit_high = 0xffff; + CS=seg; + if (cpu_state.eflags&VM_FLAG) cpu_state.seg_cs.access=(3<<5) | 2 | 0x80; + else cpu_state.seg_cs.access=(0<<5) | 2 | 0x80; + cpu_state.seg_cs.ar_high = 0x10; + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + cycles -= timing_jmp_rm; + } +} + +void PUSHW(uint16_t v) +{ + if (stack32) + { + writememw(ss,ESP-2,v); + if (cpu_state.abrt) return; + ESP-=2; + } + else + { + writememw(ss,((SP-2)&0xFFFF),v); + if (cpu_state.abrt) return; + SP-=2; + } +} +void PUSHL(uint32_t v) +{ + if (stack32) + { + writememl(ss,ESP-4,v); + if (cpu_state.abrt) return; + ESP-=4; + } + else + { + writememl(ss,((SP-4)&0xFFFF),v); + if (cpu_state.abrt) return; + SP-=4; + } +} +uint16_t POPW() +{ + uint16_t tempw; + if (stack32) + { + tempw=readmemw(ss,ESP); + if (cpu_state.abrt) return 0; + ESP+=2; + } + else + { + tempw=readmemw(ss,SP); + if (cpu_state.abrt) return 0; + SP+=2; + } + return tempw; +} +uint32_t POPL() +{ + uint32_t templ; + if (stack32) + { + templ=readmeml(ss,ESP); + if (cpu_state.abrt) return 0; + ESP+=4; + } + else + { + templ=readmeml(ss,SP); + if (cpu_state.abrt) return 0; + SP+=4; + } + return templ; +} + +void loadcscall(uint16_t seg) +{ + uint16_t seg2; + uint16_t segdat[4],segdat2[4],newss; + uint32_t addr,oldssbase=ss, oaddr; + uint32_t newpc; + int count; + uint32_t oldss,oldsp,newsp, oldsp2; + int type; + uint16_t tempw; + + int csout = output; + + if (msw&1 && !(cpu_state.eflags&VM_FLAG)) + { + if (csout) x86seg_log("Protected mode CS load! %04X\n",seg); + if (!(seg&~3)) + { + x86gpf("loadcscall(): Protected mode selector is zero",0); + return; + } + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + x86gpf("loadcscall(): Selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("loadcscall(): Selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + type=segdat[2]&0xF00; + newpc=segdat[0]; + if (type&0x800) newpc|=segdat[3]<<16; + + if (csout) x86seg_log("Code seg call - %04X - %04X %04X %04X\n",seg,segdat[0],segdat[1],segdat[2]); + if (segdat[2]&0x1000) + { + if (!(segdat[2]&0x400)) /*Not conforming*/ + { + if ((seg&3)>CPL) + { + x86gpf("loadcscall(): Non-conforming RPL > CPL",seg&~3); + return; + } + if (CPL != DPL) + { + x86gpf("loadcscall(): Non-conforming CPL != DPL",seg&~3); + return; + } + } + if (CPL < DPL) + { + x86gpf("loadcscall(): CPL < DPL",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86np("Load CS call not present", seg & 0xfffc); + return; + } + set_use32(segdat[3]&0x40); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + /*Conforming segments don't change CPL, so preserve existing CPL*/ + if (segdat[2]&0x400) + { + seg = (seg & ~3) | CPL; + segdat[2] = (segdat[2] & ~(3 << (5+8))) | (CPL << (5+8)); + } + else /*On non-conforming segments, set RPL = CPL*/ + seg = (seg & ~3) | CPL; + CS=seg; + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + + if (csout) x86seg_log("Complete\n"); + cycles -= timing_call_pm; + } + else + { + type=segdat[2]&0xF00; + if (csout) x86seg_log("Type %03X\n",type); + switch (type) + { + case 0x400: /*Call gate*/ + case 0xC00: /*386 Call gate*/ + x86seg_log("Callgate %08X\n", cpu_state.pc); + cgate32=(type&0x800); + cgate16=!cgate32; + oldcs=CS; + count=segdat[2]&31; + if (DPL < CPL) + { + x86gpf("loadcscall(): ex DPL < CPL",seg&~3); + return; + } + if ((DPL < (seg&3))) + { + x86gpf("loadcscall(): ex (DPL < (seg&3))",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86seg_log("Call gate not present %04X\n",seg); + x86np("Call gate not present\n", seg & 0xfffc); + return; + } + seg2=segdat[1]; + + x86seg_log("New address : %04X:%08X\n", seg2, newpc); + + if (!(seg2&~3)) + { + x86gpf(NULL,0); + return; + } + addr=seg2&~7; + if (seg2&4) + { + if (addr>=ldt.limit) + { + x86gpf("loadcscall(): ex Selector > LDT limit",seg2&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("loadcscall(): ex Selector > GDT limit",seg2&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + + x86seg_log("Code seg2 call - %04X - %04X %04X %04X\n",seg2,segdat[0],segdat[1],segdat[2]); + + if (DPL > CPL) + { + x86gpf("loadcscall(): ex DPL > CPL",seg2&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + x86seg_log("Call gate CS not present %04X\n",seg2); + x86np("Call gate CS not present", seg2 & 0xfffc); + return; + } + + + switch (segdat[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming code*/ + if (DPL < CPL) + { + oaddr = addr; + /*Load new stack*/ + oldss=SS; + oldsp=oldsp2=ESP; + cpl_override=1; + if (tr.access&8) + { + addr = 4 + tr.base + (DPL * 8); + newss=readmemw(0,addr+4); + newsp=readmeml(0,addr); + } + else + { + addr = 2 + tr.base + (DPL * 4); + newss=readmemw(0,addr+2); + newsp=readmemw(0,addr); + } + cpl_override=0; + if (cpu_state.abrt) return; + x86seg_log("New stack %04X:%08X\n",newss,newsp); + if (!(newss&~3)) + { + x86ts(NULL,newss&~3); + return; + } + addr=newss&~7; + if (newss&4) + { +#if 0 + if (addr>=ldt.limit) +#else + if ((addr+7)>ldt.limit) +#endif + { + x86abort("Bigger than LDT limit %04X %08X %04X CSC SS\n",newss,addr,ldt.limit); + x86ts(NULL,newss&~3); + return; + } + addr+=ldt.base; + } + else + { +#if 0 + if (addr>=gdt.limit) +#else + if ((addr+7)>gdt.limit) +#endif + { + x86abort("Bigger than GDT limit %04X %04X CSC\n",newss,gdt.limit); + x86ts(NULL,newss&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + x86seg_log("Read stack seg\n"); + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + x86seg_log("Read stack seg done!\n"); + if (((newss & 3) != DPL) || (DPL2 != DPL)) + { + x86ts(NULL,newss&~3); + return; + } + if ((segdat2[2]&0x1A00)!=0x1200) + { + x86ts(NULL,newss&~3); + return; + } + if (!(segdat2[2]&0x8000)) + { + x86ss("Call gate loading SS not present\n", newss & 0xfffc); + return; + } + if (!stack32) oldsp &= 0xFFFF; + SS=newss; + set_stack32((segdat2[3] & 0x40) ? 1 : 0); + if (stack32) ESP=newsp; + else SP=newsp; + + do_seg_load(&cpu_state.seg_ss, segdat2); + + x86seg_log("Set access 1\n"); + +#ifdef SEL_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat2[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + CS=seg2; + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + + set_use32(segdat[3]&0x40); + cpu_state.pc=newpc; + + x86seg_log("Set access 2\n"); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, oaddr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + x86seg_log("Type %04X\n",type); + if (type==0xC00) + { + PUSHL(oldss); + PUSHL(oldsp2); + if (cpu_state.abrt) + { + SS = oldss; + ESP = oldsp2; + return; + } + if (count) + { + while (count) + { + count--; + PUSHL(readmeml(oldssbase,oldsp+(count*4))); + if (cpu_state.abrt) + { + SS = oldss; + ESP = oldsp2; + return; + } + } + } + } + else + { + x86seg_log("Stack %04X\n",SP); + PUSHW(oldss); + x86seg_log("Write SS to %04X:%04X\n",SS,SP); + PUSHW(oldsp2); + if (cpu_state.abrt) + { + SS = oldss; + ESP = oldsp2; + return; + } + x86seg_log("Write SP to %04X:%04X\n",SS,SP); + if (count) + { + while (count) + { + count--; + tempw=readmemw(oldssbase,(oldsp&0xFFFF)+(count*2)); + x86seg_log("PUSH %04X\n",tempw); + PUSHW(tempw); + if (cpu_state.abrt) + { + SS = oldss; + ESP = oldsp2; + return; + } + } + } + } + cycles -= timing_call_pm_gate_inner; + break; + } + else if (DPL > CPL) + { + x86gpf("loadcscall(): Call PM Gate Inner DPL > CPL",seg2&~3); + return; + } + /*FALLTHROUGH*/ + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + CS=seg2; + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat[3]&0x40); + cpu_state.pc=newpc; + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + cycles -= timing_call_pm_gate; + break; + + default: + x86gpf("loadcscall(): Unknown subtype",seg2&~3); + return; + } + break; + + case 0x100: /*286 Task gate*/ + case 0x900: /*386 Task gate*/ + cpu_state.pc=oxpc; + cpl_override=1; + taskswitch286(seg,segdat,segdat[2]&0x800); + cpl_override=0; + break; + + default: + x86gpf("loadcscall(): Unknown type",seg&~3); + return; + } + } + } + else + { + cpu_state.seg_cs.base=seg<<4; + cpu_state.seg_cs.limit=0xFFFF; + cpu_state.seg_cs.limit_low = 0; + cpu_state.seg_cs.limit_high = 0xffff; + CS=seg; + if (cpu_state.eflags&VM_FLAG) cpu_state.seg_cs.access=(3<<5) | 2 | 0x80; + else cpu_state.seg_cs.access=(0<<5) | 2 | 0x80; + cpu_state.seg_cs.ar_high = 0x10; + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + } +} + +void pmoderetf(int is32, uint16_t off) +{ + uint32_t newpc; + uint32_t newsp; + uint32_t addr, oaddr; + uint16_t segdat[4],segdat2[4],seg,newss; + uint32_t oldsp=ESP; + x86seg_log("RETF %i %04X:%04X %08X %04X\n",is32,CS,cpu_state.pc,cr0,cpu_state.eflags); + if (is32) + { + newpc=POPL(); + seg=POPL(); if (cpu_state.abrt) return; + } + else + { + x86seg_log("PC read from %04X:%04X\n",SS,SP); + newpc=POPW(); + x86seg_log("CS read from %04X:%04X\n",SS,SP); + seg=POPW(); if (cpu_state.abrt) return; + } + x86seg_log("Return to %04X:%08X\n",seg,newpc); + if ((seg&3)=ldt.limit) + { + x86gpf("pmoderetf(): Selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("pmoderetf(): Selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) { ESP=oldsp; return; } + oaddr = addr; + + x86seg_log("CPL %i RPL %i %i\n",CPL,seg&3,is32); + + if (stack32) ESP+=off; + else SP+=off; + + if (CPL==(seg&3)) + { + x86seg_log("RETF CPL = RPL %04X\n", segdat[2]); + switch (segdat[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming*/ + if (CPL != DPL) + { + ESP=oldsp; + x86gpf("pmoderetf(): Non-conforming CPL != DPL",seg&~3); + return; + } + break; + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + if (CPL < DPL) + { + ESP=oldsp; + x86gpf("pmoderetf(): Conforming CPL < DPL",seg&~3); + return; + } + break; + default: + x86gpf("pmoderetf(): Unknown type",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + ESP=oldsp; + x86np("RETF CS not present\n", seg & 0xfffc); + return; + } + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + cpu_state.pc=newpc; + if (segdat[2] & 0x400) + segdat[2] = (segdat[2] & ~(3 << (5+8))) | ((seg & 3) << (5+8)); + CS = seg; + do_seg_load(&cpu_state.seg_cs, segdat); + cpu_state.seg_cs.access = (cpu_state.seg_cs.access & ~(3 << 5)) | ((CS & 3) << 5); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat[3] & 0x40); + + cycles -= timing_retf_pm; + } + else + { + switch (segdat[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming*/ + if ((seg&3) != DPL) + { + ESP=oldsp; + x86gpf("pmoderetf(): Non-conforming RPL != DPL",seg&~3); + return; + } + x86seg_log("RETF non-conforming, %i %i\n",seg&3, DPL); + break; + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + if ((seg&3) < DPL) + { + ESP=oldsp; + x86gpf("pmoderetf(): Conforming RPL < DPL",seg&~3); + return; + } + x86seg_log("RETF conforming, %i %i\n",seg&3, DPL); + break; + default: + ESP=oldsp; + x86gpf("pmoderetf(): Unknown type",seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + ESP=oldsp; + x86np("RETF CS not present\n", seg & 0xfffc); + return; + } + if (is32) + { + newsp=POPL(); + newss=POPL(); if (cpu_state.abrt) return; + } + else + { + x86seg_log("SP read from %04X:%04X\n",SS,SP); + newsp=POPW(); + x86seg_log("SS read from %04X:%04X\n",SS,SP); + newss=POPW(); if (cpu_state.abrt) return; + } + x86seg_log("Read new stack : %04X:%04X (%08X)\n", newss, newsp, ldt.base); + if (!(newss&~3)) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS selector is zero",newss&~3); + return; + } + addr=newss&~7; + if (newss&4) + { + if (addr>=ldt.limit) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS selector > LDT limit",newss&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS selector > GDT limit",newss&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) { ESP=oldsp; return; } + x86seg_log("Segment data %04X %04X %04X %04X\n", segdat2[0], segdat2[1], segdat2[2], segdat2[3]); + if ((newss & 3) != (seg & 3)) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS RPL > CS RPL",newss&~3); + return; + } + if ((segdat2[2]&0x1A00)!=0x1200) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS unknown type",newss&~3); + return; + } + if (!(segdat2[2]&0x8000)) + { + ESP=oldsp; + x86np("RETF loading SS not present\n", newss & 0xfffc); + return; + } + if (DPL2 != (seg & 3)) + { + ESP=oldsp; + x86gpf("pmoderetf(): New SS DPL != CS RPL",newss&~3); + return; + } + SS=newss; + set_stack32((segdat2[3] & 0x40) ? 1 : 0); + if (stack32) ESP=newsp; + else SP=newsp; + do_seg_load(&cpu_state.seg_ss, segdat2); + +#ifdef SEL_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat2[2] | 0x100); /*Set accessed bit*/ + +#ifdef CS_ACCESSED + writememw(0, oaddr+4, segdat[2] | 0x100); /*Set accessed bit*/ +#endif + cpl_override = 0; +#endif + /*Conforming segments don't change CPL, so CPL = RPL*/ + if (segdat[2]&0x400) + segdat[2] = (segdat[2] & ~(3 << (5+8))) | ((seg & 3) << (5+8)); + + cpu_state.pc=newpc; + CS=seg; + do_seg_load(&cpu_state.seg_cs, segdat); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat[3] & 0x40); + + if (stack32) ESP+=off; + else SP+=off; + + check_seg_valid(&cpu_state.seg_ds); + check_seg_valid(&cpu_state.seg_es); + check_seg_valid(&cpu_state.seg_fs); + check_seg_valid(&cpu_state.seg_gs); + cycles -= timing_retf_pm_outer; + } +} + +void restore_stack() +{ + ss=oldss; cpu_state.seg_ss.limit=oldsslimit; +} + +void pmodeint(int num, int soft) +{ + uint16_t segdat[4],segdat2[4],segdat3[4]; + uint32_t addr, oaddr; + uint16_t newss; + uint32_t oldss,oldsp; + int type; + uint32_t newsp; + uint16_t seg = 0; + int new_cpl; + + if (cpu_state.eflags&VM_FLAG && IOPL!=3 && soft) + { + x86seg_log("V86 banned int\n"); + x86gpf("pmodeint(): V86 banned int",0); + return; + } + addr=(num<<3); + if (addr>=idt.limit) + { + if (num==8) + { + /*Triple fault - reset!*/ + softresetx86(); + cpu_set_edx(); + } + else if (num==0xD) + { + pmodeint(8,0); + } + else + { + x86gpf("pmodeint(): Vector > IDT limit",(num*8)+2+((soft)?0:1)); + } + x86seg_log("addr >= IDT.limit\n"); + return; + } + addr+=idt.base; + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(2,addr); + segdat[2]=readmemw(4,addr); + segdat[3]=readmemw(6,addr); cpl_override=0; if (cpu_state.abrt) { /* x86seg_log("Abrt reading from %08X\n",addr); */ return; } + oaddr = addr; + + x86seg_log("Addr %08X seg %04X %04X %04X %04X\n",addr,segdat[0],segdat[1],segdat[2],segdat[3]); + if (!(segdat[2]&0x1F00)) + { + x86gpf("pmodeint(): Vector descriptor with bad type",(num*8)+2); + return; + } + if (DPL=0x800)?32:16; + if (!(segdat[2]&0x8000)) + { + x86np("Int gate not present\n", (num << 3) | 2); + return; + } + seg=segdat[1]; + new_cpl = seg & 3; + + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + x86gpf("pmodeint(): Interrupt or trap gate selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("pmodeint(): Interrupt or trap gate selector > GDT limit", seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + oaddr = addr; + + if (DPL2 > CPL) + { + x86gpf("pmodeint(): Interrupt or trap gate DPL > CPL",seg&~3); + return; + } + switch (segdat2[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming*/ + if (DPL2=ldt.limit) + { + x86ss(NULL,newss&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86ss(NULL,newss&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat3[0]=readmemw(0,addr); + segdat3[1]=readmemw(0,addr+2); + segdat3[2]=readmemw(0,addr+4); + segdat3[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) return; + if (((newss & 3) != DPL2) || (DPL3 != DPL2)) + { + x86ss(NULL,newss&~3); + return; + } + if ((segdat3[2]&0x1A00)!=0x1200) + { + x86ss(NULL,newss&~3); + return; + } + if (!(segdat3[2]&0x8000)) + { + x86np("Int gate loading SS not present\n", newss & 0xfffc); + return; + } + SS=newss; + set_stack32((segdat3[3] & 0x40) ? 1 : 0); + if (stack32) ESP=newsp; + else SP=newsp; + do_seg_load(&cpu_state.seg_ss, segdat3); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat3[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + x86seg_log("New stack %04X:%08X\n",SS,ESP); + cpl_override=1; + if (type>=0x800) + { + if (cpu_state.eflags & VM_FLAG) + { + PUSHL(GS); + PUSHL(FS); + PUSHL(DS); + PUSHL(ES); if (cpu_state.abrt) return; + loadseg(0,&cpu_state.seg_ds); + loadseg(0,&cpu_state.seg_es); + loadseg(0,&cpu_state.seg_fs); + loadseg(0,&cpu_state.seg_gs); + } + PUSHL(oldss); + PUSHL(oldsp); + PUSHL(cpu_state.flags|(cpu_state.eflags<<16)); + PUSHL(CS); + PUSHL(cpu_state.pc); if (cpu_state.abrt) return; + } + else + { + PUSHW(oldss); + PUSHW(oldsp); + PUSHW(cpu_state.flags); + PUSHW(CS); + PUSHW(cpu_state.pc); if (cpu_state.abrt) return; + } + cpl_override=0; + cpu_state.seg_cs.access=0 | 0x80; + cycles -= timing_int_pm_outer - timing_int_pm; + break; + } + else if (DPL2!=CPL) + { + x86gpf("pmodeint(): DPL != CPL",seg&~3); + return; + } + /*FALLTHROUGH*/ + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + if (!(segdat2[2]&0x8000)) + { + x86np("Int gate CS not present\n", segdat[1] & 0xfffc); + return; + } + if ((cpu_state.eflags & VM_FLAG) && DPL20x800) + { + PUSHL(cpu_state.flags|(cpu_state.eflags<<16)); + PUSHL(CS); + PUSHL(cpu_state.pc); if (cpu_state.abrt) return; + } + else + { + PUSHW(cpu_state.flags); + PUSHW(CS); + PUSHW(cpu_state.pc); if (cpu_state.abrt) return; + } + new_cpl = CS & 3; + break; + default: + x86gpf("pmodeint(): Unknown type",seg&~3); + return; + } + do_seg_load(&cpu_state.seg_cs, segdat2); + CS = (seg & ~3) | new_cpl; + cpu_state.seg_cs.access = (cpu_state.seg_cs.access & ~(3 << 5)) | (new_cpl << 5); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + if (type>0x800) cpu_state.pc=segdat[0]|(segdat[3]<<16); + else cpu_state.pc=segdat[0]; + set_use32(segdat2[3]&0x40); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, oaddr+4, segdat2[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + + cpu_state.eflags&=~VM_FLAG; + cpu_cur_status &= ~CPU_STATUS_V86; + if (!(type&0x100)) + { + cpu_state.flags&=~I_FLAG; + } + cpu_state.flags&=~(T_FLAG|NT_FLAG); + cycles -= timing_int_pm; + break; + + case 0x500: /*Task gate*/ + seg=segdat[1]; + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + x86gpf("pmodeint(): Task gate selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86gpf("pmodeint(): Task gate selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); + cpl_override=0; if (cpu_state.abrt) return; + if (!(segdat2[2]&0x8000)) + { + x86np("Int task gate not present\n", segdat[1] & 0xfffc); + return; + } + optype=OPTYPE_INT; + cpl_override=1; + taskswitch286(seg,segdat2,segdat2[2]&0x800); + cpl_override=0; + break; + + default: + x86gpf(NULL,seg&~3); + return; + } +} + +void pmodeiret(int is32) +{ + uint32_t newsp; + uint16_t newss; + uint32_t tempflags,flagmask; + uint32_t newpc; + uint16_t segdat[4],segdat2[4]; + uint16_t segs[4]; + uint16_t seg; + uint32_t addr, oaddr; + uint32_t oldsp=ESP; + if (is386 && (cpu_state.eflags&VM_FLAG)) + { + if (IOPL!=3) + { + x86gpf(NULL,0); + return; + } + oxpc=cpu_state.pc; + if (is32) + { + newpc=POPL(); + seg=POPL(); + tempflags=POPL(); if (cpu_state.abrt) return; + } + else + { + newpc=POPW(); + seg=POPW(); + tempflags=POPW(); if (cpu_state.abrt) return; + } + cpu_state.pc = newpc; + cpu_state.seg_cs.base=seg<<4; + cpu_state.seg_cs.limit=0xFFFF; + cpu_state.seg_cs.limit_low = 0; + cpu_state.seg_cs.limit_high = 0xffff; + cpu_state.seg_cs.access |= 0x80; + cpu_state.seg_cs.ar_high = 0x10; + CS=seg; + cpu_state.flags=(cpu_state.flags&0x3000)|(tempflags&0xCFD5)|2; + cycles -= timing_iret_rm; + return; + } + + if (cpu_state.flags&NT_FLAG) + { + seg=readmemw(tr.base,0); + addr=seg&~7; + if (seg&4) + { + x86seg_log("TS LDT %04X %04X IRET\n",seg,gdt.limit); + x86ts("pmodeiret(): Selector points to LDT",seg&~3); + return; + } + else + { + if (addr>=gdt.limit) + { + x86ts(NULL,seg&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); + taskswitch286(seg,segdat,segdat[2] & 0x800); + cpl_override=0; + return; + } + oxpc=cpu_state.pc; + flagmask=0xFFFF; + if (CPL) flagmask&=~0x3000; + if (IOPL>16)&VM_FLAG)) + { + newsp=POPL(); + newss=POPL(); + segs[0]=POPL(); + segs[1]=POPL(); + segs[2]=POPL(); + segs[3]=POPL(); if (cpu_state.abrt) { ESP = oldsp; return; } + cpu_state.eflags=tempflags>>16; + cpu_cur_status |= CPU_STATUS_V86; + loadseg(segs[0],&cpu_state.seg_es); + do_seg_v86_init(&cpu_state.seg_es); + loadseg(segs[1],&cpu_state.seg_ds); + do_seg_v86_init(&cpu_state.seg_ds); + cpu_cur_status |= CPU_STATUS_NOTFLATDS; + loadseg(segs[2],&cpu_state.seg_fs); + do_seg_v86_init(&cpu_state.seg_fs); + loadseg(segs[3],&cpu_state.seg_gs); + do_seg_v86_init(&cpu_state.seg_gs); + + cpu_state.pc=newpc; + cpu_state.seg_cs.base=seg<<4; + cpu_state.seg_cs.limit=0xFFFF; + cpu_state.seg_cs.limit_low = 0; + cpu_state.seg_cs.limit_high = 0xffff; + CS=seg; + cpu_state.seg_cs.access=(3<<5) | 2 | 0x80; + cpu_state.seg_cs.ar_high=0x10; + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + + ESP=newsp; + loadseg(newss,&cpu_state.seg_ss); + do_seg_v86_init(&cpu_state.seg_ss); + cpu_cur_status |= CPU_STATUS_NOTFLATSS; + use32=0; + cpu_cur_status &= ~CPU_STATUS_USE32; + cpu_state.flags=(tempflags&0xFFD5)|2; + cycles -= timing_iret_v86; + return; + } + } + else + { + newpc=POPW(); + seg=POPW(); + tempflags=POPW(); if (cpu_state.abrt) { ESP = oldsp; return; } + } + if (!(seg&~3)) + { + ESP = oldsp; + x86gpf(NULL,0); + return; + } + + addr=seg&~7; + if (seg&4) + { + if (addr>=ldt.limit) + { + ESP = oldsp; + x86gpf("pmodeiret(): Selector > LDT limit",seg&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + ESP = oldsp; + x86gpf("pmodeiret(): Selector > GDT limit",seg&~3); + return; + } + addr+=gdt.base; + } + if ((seg&3) < CPL) + { + ESP = oldsp; + x86gpf("pmodeiret(): RPL < CPL",seg&~3); + return; + } + cpl_override=1; + segdat[0]=readmemw(0,addr); + segdat[1]=readmemw(0,addr+2); + segdat[2]=readmemw(0,addr+4); + segdat[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) { ESP = oldsp; return; } + + switch (segdat[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming code*/ + if ((seg&3) != DPL) + { + ESP = oldsp; + x86gpf("pmodeiret(): Non-conforming RPL != DPL",seg&~3); + return; + } + break; + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming code*/ + if ((seg&3) < DPL) + { + ESP = oldsp; + x86gpf("pmodeiret(): Conforming RPL < DPL",seg&~3); + return; + } + break; + default: + ESP = oldsp; + x86gpf(NULL,seg&~3); + return; + } + if (!(segdat[2]&0x8000)) + { + ESP = oldsp; + x86np("IRET CS not present\n", seg & 0xfffc); + return; + } + if ((seg&3) == CPL) + { + CS=seg; + do_seg_load(&cpu_state.seg_cs, segdat); + cpu_state.seg_cs.access = (cpu_state.seg_cs.access & ~(3 << 5)) | ((CS & 3) << 5); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat[3]&0x40); + +#ifdef CS_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat[2] | 0x100); /*Set accessed bit*/ + cpl_override = 0; +#endif + cycles -= timing_iret_pm; + } + else /*Return to outer level*/ + { + oaddr = addr; + x86seg_log("Outer level\n"); + if (is32) + { + newsp=POPL(); + newss=POPL(); if (cpu_state.abrt) { ESP = oldsp; return; } + } + else + { + newsp=POPW(); + newss=POPW(); if (cpu_state.abrt) { ESP = oldsp; return; } + } + + x86seg_log("IRET load stack %04X:%04X\n",newss,newsp); + + if (!(newss&~3)) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS selector is zero",newss&~3); + return; + } + addr=newss&~7; + if (newss&4) + { + if (addr>=ldt.limit) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS selector > LDT limit",newss&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS selector > GDT limit",newss&~3); + return; + } + addr+=gdt.base; + } + cpl_override=1; + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); cpl_override=0; if (cpu_state.abrt) { ESP = oldsp; return; } + if ((newss & 3) != (seg & 3)) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS RPL > CS RPL",newss&~3); + return; + } + if ((segdat2[2]&0x1A00)!=0x1200) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS bad type",newss&~3); + return; + } + if (DPL2 != (seg & 3)) + { + ESP = oldsp; + x86gpf("pmodeiret(): New SS DPL != CS RPL",newss&~3); + return; + } + if (!(segdat2[2]&0x8000)) + { + ESP = oldsp; + x86np("IRET loading SS not present\n", newss & 0xfffc); + return; + } + SS=newss; + set_stack32((segdat2[3] & 0x40) ? 1 : 0); + if (stack32) ESP=newsp; + else SP=newsp; + do_seg_load(&cpu_state.seg_ss, segdat2); + +#ifdef SEL_ACCESSED + cpl_override = 1; + writememw(0, addr+4, segdat2[2] | 0x100); /*Set accessed bit*/ + +#ifdef CS_ACCESSED + writememw(0, oaddr+4, segdat[2] | 0x100); /*Set accessed bit*/ +#endif + cpl_override = 0; +#endif + /*Conforming segments don't change CPL, so CPL = RPL*/ + if (segdat[2]&0x400) + segdat[2] = (segdat[2] & ~(3 << (5+8))) | ((seg & 3) << (5+8)); + + CS=seg; + do_seg_load(&cpu_state.seg_cs, segdat); + cpu_state.seg_cs.access = (cpu_state.seg_cs.access & ~(3 << 5)) | ((CS & 3) << 5); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat[3] & 0x40); + + check_seg_valid(&cpu_state.seg_ds); + check_seg_valid(&cpu_state.seg_es); + check_seg_valid(&cpu_state.seg_fs); + check_seg_valid(&cpu_state.seg_gs); + cycles -= timing_iret_pm_outer; + } + cpu_state.pc=newpc; + cpu_state.flags=(cpu_state.flags&~flagmask)|(tempflags&flagmask&0xFFD5)|2; + if (is32) cpu_state.eflags=tempflags>>16; +} + +void taskswitch286(uint16_t seg, uint16_t *segdat, int is32) +{ + uint32_t base; + uint32_t limit; + uint32_t templ; + uint16_t tempw; + + uint32_t new_cr3=0; + uint16_t new_es,new_cs,new_ss,new_ds,new_fs,new_gs; + uint16_t new_ldt; + + uint32_t new_eax,new_ebx,new_ecx,new_edx,new_esp,new_ebp,new_esi,new_edi,new_pc,new_flags; + + uint32_t addr; + + uint16_t segdat2[4]; + + base=segdat[1]|((segdat[2]&0xFF)<<16); + limit=segdat[0]; + if(is386) + { + base |= (segdat[3]>>8)<<24; + limit |= (segdat[3]&0xF)<<16; + } + + if (is32) + { + if (limit < 103) + { + x86ts(NULL, seg); + return; + } + + if (optype==JMP || optype==CALL || optype==OPTYPE_INT) + { + if (tr.seg&4) tempw=readmemw(ldt.base,(seg&~7)+4); + else tempw=readmemw(gdt.base,(seg&~7)+4); + if (cpu_state.abrt) return; + tempw|=0x200; + if (tr.seg&4) writememw(ldt.base,(seg&~7)+4,tempw); + else writememw(gdt.base,(seg&~7)+4,tempw); + } + if (cpu_state.abrt) return; + + if (optype==IRET) cpu_state.flags&=~NT_FLAG; + + cpu_386_flags_rebuild(); + writememl(tr.base,0x1C,cr3); + writememl(tr.base,0x20,cpu_state.pc); + writememl(tr.base,0x24,cpu_state.flags|(cpu_state.eflags<<16)); + + writememl(tr.base,0x28,EAX); + writememl(tr.base,0x2C,ECX); + writememl(tr.base,0x30,EDX); + writememl(tr.base,0x34,EBX); + writememl(tr.base,0x38,ESP); + writememl(tr.base,0x3C,EBP); + writememl(tr.base,0x40,ESI); + writememl(tr.base,0x44,EDI); + + writememl(tr.base,0x48,ES); + writememl(tr.base,0x4C,CS); + writememl(tr.base,0x50,SS); + writememl(tr.base,0x54,DS); + writememl(tr.base,0x58,FS); + writememl(tr.base,0x5C,GS); + + if (optype==JMP || optype==IRET) + { + if (tr.seg&4) tempw=readmemw(ldt.base,(tr.seg&~7)+4); + else tempw=readmemw(gdt.base,(tr.seg&~7)+4); + if (cpu_state.abrt) return; + tempw&=~0x200; + if (tr.seg&4) writememw(ldt.base,(tr.seg&~7)+4,tempw); + else writememw(gdt.base,(tr.seg&~7)+4,tempw); + } + if (cpu_state.abrt) return; + + if (optype==OPTYPE_INT || optype==CALL) + { + writememl(base,0,tr.seg); + if (cpu_state.abrt) + return; + } + + + new_cr3=readmeml(base,0x1C); + new_pc=readmeml(base,0x20); + new_flags=readmeml(base,0x24); + if (optype == OPTYPE_INT || optype == CALL) + new_flags |= NT_FLAG; + + new_eax=readmeml(base,0x28); + new_ecx=readmeml(base,0x2C); + new_edx=readmeml(base,0x30); + new_ebx=readmeml(base,0x34); + new_esp=readmeml(base,0x38); + new_ebp=readmeml(base,0x3C); + new_esi=readmeml(base,0x40); + new_edi=readmeml(base,0x44); + + new_es=readmemw(base,0x48); + new_cs=readmemw(base,0x4C); + new_ss=readmemw(base,0x50); + new_ds=readmemw(base,0x54); + new_fs=readmemw(base,0x58); + new_gs=readmemw(base,0x5C); + new_ldt=readmemw(base,0x60); + + cr0 |= 8; + + cr3=new_cr3; + flushmmucache(); + + cpu_state.pc=new_pc; + cpu_state.flags=new_flags; + cpu_state.eflags=new_flags>>16; + cpu_386_flags_extract(); + + ldt.seg=new_ldt; + templ=(ldt.seg&~7)+gdt.base; + ldt.limit=readmemw(0,templ); + if (readmemb(0,templ+6)&0x80) + { + ldt.limit<<=12; + ldt.limit|=0xFFF; + } + ldt.base=(readmemw(0,templ+2))|(readmemb(0,templ+4)<<16)|(readmemb(0,templ+7)<<24); + + if (cpu_state.eflags & VM_FLAG) + { + loadcs(new_cs); + set_use32(0); + cpu_cur_status |= CPU_STATUS_V86; + } + else + { + if (!(new_cs&~3)) + { + x86ts(NULL,0); + return; + } + addr=new_cs&~7; + if (new_cs&4) + { + if (addr>=ldt.limit) + { + x86ts(NULL,new_cs&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86ts(NULL,new_cs&~3); + return; + } + addr+=gdt.base; + } + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); + if (!(segdat2[2]&0x8000)) + { + x86np("TS loading CS not present\n", new_cs & 0xfffc); + return; + } + switch (segdat2[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming*/ + if ((new_cs&3) != DPL2) + { + x86ts(NULL,new_cs&~3); + return; + } + break; + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + if ((new_cs&3) < DPL2) + { + x86ts(NULL,new_cs&~3); + return; + } + break; + default: + x86ts(NULL,new_cs&~3); + return; + } + + CS=new_cs; + do_seg_load(&cpu_state.seg_cs, segdat2); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(segdat2[3] & 0x40); + cpu_cur_status &= ~CPU_STATUS_V86; + } + + EAX=new_eax; + ECX=new_ecx; + EDX=new_edx; + EBX=new_ebx; + ESP=new_esp; + EBP=new_ebp; + ESI=new_esi; + EDI=new_edi; + + loadseg(new_es,&cpu_state.seg_es); + loadseg(new_ss,&cpu_state.seg_ss); + loadseg(new_ds,&cpu_state.seg_ds); + loadseg(new_fs,&cpu_state.seg_fs); + loadseg(new_gs,&cpu_state.seg_gs); + } + else + { + if (limit < 43) + { + x86ts(NULL, seg); + return; + } + + if (optype==JMP || optype==CALL || optype==OPTYPE_INT) + { + if (tr.seg&4) tempw=readmemw(ldt.base,(seg&~7)+4); + else tempw=readmemw(gdt.base,(seg&~7)+4); + if (cpu_state.abrt) return; + tempw|=0x200; + if (tr.seg&4) writememw(ldt.base,(seg&~7)+4,tempw); + else writememw(gdt.base,(seg&~7)+4,tempw); + } + if (cpu_state.abrt) return; + + if (optype==IRET) cpu_state.flags&=~NT_FLAG; + + cpu_386_flags_rebuild(); + writememw(tr.base,0x0E,cpu_state.pc); + writememw(tr.base,0x10,cpu_state.flags); + + writememw(tr.base,0x12,AX); + writememw(tr.base,0x14,CX); + writememw(tr.base,0x16,DX); + writememw(tr.base,0x18,BX); + writememw(tr.base,0x1A,SP); + writememw(tr.base,0x1C,BP); + writememw(tr.base,0x1E,SI); + writememw(tr.base,0x20,DI); + + writememw(tr.base,0x22,ES); + writememw(tr.base,0x24,CS); + writememw(tr.base,0x26,SS); + writememw(tr.base,0x28,DS); + + if (optype==JMP || optype==IRET) + { + if (tr.seg&4) tempw=readmemw(ldt.base,(tr.seg&~7)+4); + else tempw=readmemw(gdt.base,(tr.seg&~7)+4); + if (cpu_state.abrt) return; + tempw&=~0x200; + if (tr.seg&4) writememw(ldt.base,(tr.seg&~7)+4,tempw); + else writememw(gdt.base,(tr.seg&~7)+4,tempw); + } + if (cpu_state.abrt) return; + + if (optype==OPTYPE_INT || optype==CALL) + { + writememw(base,0,tr.seg); + if (cpu_state.abrt) + return; + } + + new_pc=readmemw(base,0x0E); + new_flags=readmemw(base,0x10); + if (optype == OPTYPE_INT || optype == CALL) + new_flags |= NT_FLAG; + + new_eax=readmemw(base,0x12); + new_ecx=readmemw(base,0x14); + new_edx=readmemw(base,0x16); + new_ebx=readmemw(base,0x18); + new_esp=readmemw(base,0x1A); + new_ebp=readmemw(base,0x1C); + new_esi=readmemw(base,0x1E); + new_edi=readmemw(base,0x20); + + new_es=readmemw(base,0x22); + new_cs=readmemw(base,0x24); + new_ss=readmemw(base,0x26); + new_ds=readmemw(base,0x28); + new_ldt=readmemw(base,0x2A); + + msw |= 8; + + cpu_state.pc=new_pc; + cpu_state.flags=new_flags; + cpu_386_flags_extract(); + + ldt.seg=new_ldt; + templ=(ldt.seg&~7)+gdt.base; + ldt.limit=readmemw(0,templ); + ldt.base=(readmemw(0,templ+2))|(readmemb(0,templ+4)<<16); + if (is386) + { + if (readmemb(0,templ+6)&0x80) + { + ldt.limit<<=12; + ldt.limit|=0xFFF; + } + ldt.base|=(readmemb(0,templ+7)<<24); + } + + if (!(new_cs&~3)) + { + x86ts(NULL,0); + return; + } + addr=new_cs&~7; + if (new_cs&4) + { + if (addr>=ldt.limit) + { + x86ts(NULL,new_cs&~3); + return; + } + addr+=ldt.base; + } + else + { + if (addr>=gdt.limit) + { + x86ts(NULL,new_cs&~3); + return; + } + addr+=gdt.base; + } + segdat2[0]=readmemw(0,addr); + segdat2[1]=readmemw(0,addr+2); + segdat2[2]=readmemw(0,addr+4); + segdat2[3]=readmemw(0,addr+6); + if (!(segdat2[2]&0x8000)) + { + x86np("TS loading CS not present\n", new_cs & 0xfffc); + return; + } + switch (segdat2[2]&0x1F00) + { + case 0x1800: case 0x1900: case 0x1A00: case 0x1B00: /*Non-conforming*/ + if ((new_cs&3) != DPL2) + { + x86ts(NULL,new_cs&~3); + return; + } + break; + case 0x1C00: case 0x1D00: case 0x1E00: case 0x1F00: /*Conforming*/ + if ((new_cs&3) < DPL2) + { + x86ts(NULL,new_cs&~3); + return; + } + break; + default: + x86ts(NULL,new_cs&~3); + return; + } + + CS=new_cs; + do_seg_load(&cpu_state.seg_cs, segdat2); + if (CPL==3 && oldcpl!=3) flushmmucache_cr3(); + set_use32(0); + + EAX=new_eax | 0xFFFF0000; + ECX=new_ecx | 0xFFFF0000; + EDX=new_edx | 0xFFFF0000; + EBX=new_ebx | 0xFFFF0000; + ESP=new_esp | 0xFFFF0000; + EBP=new_ebp | 0xFFFF0000; + ESI=new_esi | 0xFFFF0000; + EDI=new_edi | 0xFFFF0000; + + loadseg(new_es,&cpu_state.seg_es); + loadseg(new_ss,&cpu_state.seg_ss); + loadseg(new_ds,&cpu_state.seg_ds); + if (is386) + { + loadseg(0,&cpu_state.seg_fs); + loadseg(0,&cpu_state.seg_gs); + } + } + + tr.seg=seg; + tr.base=base; + tr.limit=limit; + tr.access=segdat[2]>>8; + tr.ar_high = segdat[3] & 0xff; +}