From be79ea78c73b49b54490bfb02a04691da83fadef Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Thu, 10 Aug 2023 15:43:16 -0400 Subject: [PATCH] sonarlinting and formatting in src/cpu --- src/cpu/386.c | 8 +- src/cpu/386_common.c | 93 +- src/cpu/386_dynarec.c | 23 +- src/cpu/386_dynarec_ops.c | 5 +- src/cpu/8080.c | 20 +- src/cpu/808x.c | 184 ++- src/cpu/808x/queue.c | 3 +- src/cpu/808x/queue.h | 29 +- src/cpu/codegen_timing_486.c | 428 ++--- src/cpu/codegen_timing_686.c | 712 ++++---- src/cpu/codegen_timing_common.c | 181 ++- src/cpu/codegen_timing_common.h | 78 +- src/cpu/codegen_timing_k6.c | 2522 ++++++++++++++--------------- src/cpu/codegen_timing_p6.c | 2356 +++++++++++++-------------- src/cpu/codegen_timing_pentium.c | 1159 ++++++------- src/cpu/codegen_timing_winchip.c | 416 ++--- src/cpu/codegen_timing_winchip2.c | 825 +++++----- src/cpu/cpu.c | 26 +- src/cpu/cpu.h | 18 +- src/cpu/x86.c | 14 +- src/cpu/x86.h | 52 +- src/cpu/x86_ops_3dnow.h | 3 +- src/cpu/x86_ops_arith.h | 69 +- src/cpu/x86_ops_atomic.h | 60 +- src/cpu/x86_ops_bcd.h | 10 +- src/cpu/x86_ops_bit.h | 21 +- src/cpu/x86_ops_call.h | 36 +- src/cpu/x86_ops_fpu.h | 4 +- src/cpu/x86_ops_i686.h | 6 +- src/cpu/x86_ops_jump.h | 13 +- src/cpu/x86_ops_misc.h | 27 +- src/cpu/x86_ops_mmx_mov.h | 4 +- src/cpu/x86_ops_mov_ctrl.h | 44 +- src/cpu/x86_ops_mov_seg.h | 12 +- src/cpu/x86_ops_mul.h | 24 +- src/cpu/x86_ops_pmode.h | 18 +- src/cpu/x86_ops_prefix.h | 3 +- src/cpu/x86_ops_ret.h | 4 +- src/cpu/x86_ops_shift.h | 54 +- src/cpu/x86_ops_stack.h | 16 +- src/cpu/x86_ops_string.h | 18 +- src/cpu/x86_ops_string_2386.h | 18 +- src/cpu/x86seg.c | 194 ++- src/cpu/x87.c | 29 +- src/cpu/x87.h | 8 +- src/cpu/x87_ops.h | 17 +- src/cpu/x87_ops_misc.h | 19 +- src/cpu/x87_ops_sf.h | 16 +- src/cpu/x87_ops_sf_arith.h | 72 +- src/cpu/x87_ops_sf_compare.h | 30 +- src/cpu/x87_ops_sf_misc.h | 6 +- src/cpu/x87_ops_sf_trans.h | 31 +- 52 files changed, 5203 insertions(+), 4835 deletions(-) diff --git a/src/cpu/386.c b/src/cpu/386.c index eebbae29e..11e87cadc 100644 --- a/src/cpu/386.c +++ b/src/cpu/386.c @@ -230,8 +230,12 @@ fetch_ea_16_long(uint32_t rmdat) void exec386_2386(int cycs) { - int vector, tempi, cycdiff, oldcyc; - int cycle_period, ins_cycles; + int vector; + int tempi; + int cycdiff; + int oldcyc; + int cycle_period; + int ins_cycles; uint32_t addr; cycles += cycs; diff --git a/src/cpu/386_common.c b/src/cpu/386_common.c index 5763a787f..f8593fdbe 100644 --- a/src/cpu/386_common.c +++ b/src/cpu/386_common.c @@ -26,6 +26,7 @@ #include "386_common.h" #include "x86_flags.h" #include "x86seg.h" +#include <86box/plat_unused.h> #ifdef USE_DYNAREC # include "codegen.h" @@ -34,19 +35,26 @@ # define CPU_BLOCK_END() #endif -x86seg gdt, ldt, idt, tr; +x86seg gdt; +x86seg ldt; +x86seg idt; +x86seg tr; -uint32_t cr2, cr3, cr4; +uint32_t cr2; +uint32_t cr3; +uint32_t cr4; uint32_t dr[8]; uint32_t use32; int stack32; -uint32_t *eal_r, *eal_w; +uint32_t *eal_r; +uint32_t *eal_w; int nmi_enable = 1; -int alt_access, cpl_override = 0; +int alt_access; +int cpl_override = 0; #ifdef USE_NEW_DYNAREC uint16_t cpu_cur_status = 0; @@ -59,23 +67,33 @@ extern uint8_t *pccache2; extern int optype; extern uint32_t pccache; -int in_sys = 0, unmask_a20_in_smm = 0; -uint32_t old_rammask = 0xffffffff; +int in_sys = 0; +int unmask_a20_in_smm = 0; +uint32_t old_rammask = 0xffffffff; int soft_reset_mask = 0; int smi_latched = 0; -int smm_in_hlt = 0, smi_block = 0; +int smm_in_hlt = 0; +int smi_block = 0; int prefetch_prefixes = 0; -int tempc, oldcpl, optype, inttype, oddeven = 0; +int tempc; +int oldcpl; +int optype; +int inttype; +int oddeven = 0; int timetolive; uint16_t oldcs; -uint32_t oldds, oldss, olddslimit, oldsslimit, - olddslimitw, oldsslimitw; +uint32_t oldds; +uint32_t oldss; +uint32_t olddslimit; +uint32_t oldsslimit; +uint32_t olddslimitw; +uint32_t oldsslimitw; uint32_t oxpc; uint32_t rmdat32; uint32_t backupregs[16]; @@ -99,8 +117,10 @@ int opcode_length[256] = { 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 1, 1, 1, 1, /* 0xex */ 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 3 }; /* 0xfx */ -uint32_t addr64, addr64_2; -uint32_t addr64a[8], addr64a_2[8]; +uint32_t addr64; +uint32_t addr64_2; +uint32_t addr64a[8]; +uint32_t addr64a_2[8]; static pc_timer_t *cpu_fast_off_timer = NULL; static double cpu_fast_off_period = 0.0; @@ -494,12 +514,10 @@ smm_seg_load(x86seg *s) static void smram_save_state_p5(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_P5_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_P5_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_P5_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -588,9 +606,7 @@ smram_save_state_p5(uint32_t *saved_state, int in_hlt) static void smram_restore_state_p5(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_P5_EAX - n]; if (saved_state[SMRAM_FIELD_P5_AUTOHALT_RESTART] & 0xffff) @@ -701,12 +717,10 @@ smram_restore_state_p5(uint32_t *saved_state) static void smram_save_state_p6(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_P6_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_P6_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_P6_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -787,9 +801,7 @@ smram_save_state_p6(uint32_t *saved_state, int in_hlt) static void smram_restore_state_p6(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_P6_EAX - n]; if (saved_state[SMRAM_FIELD_P6_AUTOHALT_RESTART] & 0xffff) @@ -894,12 +906,10 @@ smram_restore_state_p6(uint32_t *saved_state) static void smram_save_state_amd_k(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_AMD_K_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_AMD_K_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_AMD_K_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -979,9 +989,7 @@ smram_save_state_amd_k(uint32_t *saved_state, int in_hlt) static void smram_restore_state_amd_k(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_AMD_K_EAX - n]; if (saved_state[SMRAM_FIELD_AMD_K_AUTOHALT_RESTART] & 0xffff) @@ -1080,7 +1088,7 @@ smram_restore_state_amd_k(uint32_t *saved_state) } static void -smram_save_state_cyrix(uint32_t *saved_state, int in_hlt) +smram_save_state_cyrix(uint32_t *saved_state, UNUSED(int in_hlt)) { saved_state[0] = dr[7]; saved_state[1] = cpu_state.flags | (cpu_state.eflags << 16); @@ -1104,7 +1112,7 @@ smram_restore_state_cyrix(uint32_t *saved_state) void enter_smm(int in_hlt) { - uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE], n; + uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE]; uint32_t smram_state = smbase + 0x10000; /* If it's a CPU on which SMM is not supported, do nothing. */ @@ -1231,7 +1239,7 @@ enter_smm(int in_hlt) writememl(0, smram_state - 0x18, saved_state[5]); writememl(0, smram_state - 0x24, saved_state[6]); } else { - for (n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { + for (uint8_t n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { smram_state -= 4; writememl(0, smram_state, saved_state[n]); } @@ -1291,7 +1299,7 @@ enter_smm_check(int in_hlt) void leave_smm(void) { - uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE], n; + uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE]; uint32_t smram_state = smbase + 0x10000; /* If it's a CPU on which SMM is not supported (or not implemented in 86Box), do nothing. */ @@ -1312,7 +1320,7 @@ leave_smm(void) cyrix_load_seg_descriptor(smram_state - 0x20, &cpu_state.seg_cs); saved_state[6] = readmeml(0, smram_state - 0x24); } else { - for (n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { + for (uint8_t n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { smram_state -= 4; saved_state[n] = readmeml(0, smram_state); x386_common_log("Reading %08X from memory at %08X to array element %i\n", saved_state[n], smram_state, n); @@ -1483,7 +1491,8 @@ int x86_int_sw_rm(int num) { uint32_t addr; - uint16_t new_pc, new_cs; + uint16_t new_pc; + uint16_t new_cs; flags_rebuild(); cycles -= timing_int; @@ -1571,8 +1580,10 @@ checkio(uint32_t port, int mask) int divl(uint32_t val) { - uint64_t num, quo; - uint32_t rem, quo32; + uint64_t num; + uint64_t quo; + uint32_t rem; + uint32_t quo32; if (val == 0) { divexcp(); @@ -1598,8 +1609,10 @@ divl(uint32_t val) int idivl(int32_t val) { - int64_t num, quo; - int32_t rem, quo32; + int64_t num; + int64_t quo; + int32_t rem; + int32_t quo32; if (val == 0) { divexcp(); diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 21eb7c2b7..38261a234 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -46,7 +46,8 @@ #define CPU_BLOCK_END() cpu_block_end = 1 -int inrecomp = 0, cpu_block_end = 0; +int inrecomp = 0; +int cpu_block_end = 0; int cpu_end_block_after_ins = 0; #ifdef ENABLE_386_DYNAREC_LOG @@ -334,7 +335,8 @@ exec386_dynarec_int(void) static __inline void exec386_dynarec_dyn(void) { - uint32_t start_pc = 0, phys_addr = get_phys(cs + cpu_state.pc); + uint32_t start_pc = 0; + uint32_t phys_addr = get_phys(cs + cpu_state.pc); int hash = HASH(phys_addr); # ifdef USE_NEW_DYNAREC codeblock_t *block = &codeblock[codeblock_hash[hash]]; @@ -674,10 +676,13 @@ exec386_dynarec_dyn(void) void exec386_dynarec(int cycs) { - int vector, tempi; + int vector; + int tempi; int cycdiff; - int oldcyc, oldcyc2; - uint64_t oldtsc, delta; + int oldcyc; + int oldcyc2; + uint64_t oldtsc; + uint64_t delta; int cyc_period = cycs / 2000; /*5us*/ @@ -794,8 +799,12 @@ exec386_dynarec(int cycs) void exec386(int cycs) { - int vector, tempi, cycdiff, oldcyc; - int cycle_period, ins_cycles; + int vector; + int tempi; + int cycdiff; + int oldcyc; + int cycle_period; + int ins_cycles; uint32_t addr; cycles += cycs; diff --git a/src/cpu/386_dynarec_ops.c b/src/cpu/386_dynarec_ops.c index f3e2f6e6e..f46062bcc 100644 --- a/src/cpu/386_dynarec_ops.c +++ b/src/cpu/386_dynarec_ops.c @@ -21,6 +21,7 @@ #include <86box/pic.h> #include <86box/gdbstub.h> #include "codegen.h" +#include <86box/plat_unused.h> #define CPU_BLOCK_END() cpu_block_end = 1 @@ -31,7 +32,7 @@ #include "386_common.h" static __inline void -fetch_ea_32_long(uint32_t rmdat) +fetch_ea_32_long(UNUSED(uint32_t rmdat)) { eal_r = eal_w = NULL; easeg = cpu_state.ea_seg->base; @@ -45,7 +46,7 @@ fetch_ea_32_long(uint32_t rmdat) } static __inline void -fetch_ea_16_long(uint32_t rmdat) +fetch_ea_16_long(UNUSED(uint32_t rmdat)) { eal_r = eal_w = NULL; easeg = cpu_state.ea_seg->base; diff --git a/src/cpu/8080.c b/src/cpu/8080.c index 6f3dd4267..7a7e7b96c 100644 --- a/src/cpu/8080.c +++ b/src/cpu/8080.c @@ -19,15 +19,21 @@ #include <86box/timer.h> #include <86box/i8080.h> #include <86box/mem.h> +#include <86box/plat_unused.h> -static int completed = 1; -static int in_rep = 0, repeating = 0, rep_c_flag = 0; -static int oldc, cycdiff; +static int completed = 1; +static int in_rep = 0; +static int repeating = 0; +static int rep_c_flag = 0; +static int oldc; +static int cycdiff; #ifdef UNUSED_8080_VARS static int prefetching = 1; -static int refresh = 0, clear_lock = 0; +static int refresh = 0; +static int clear_lock = 0; -static uint32_t cpu_src = 0, cpu_dest = 0; +static uint32_t cpu_src = 0; +static uint32_t cpu_dest = 0; static uint32_t cpu_data = 0; #endif @@ -43,7 +49,7 @@ clock_end(void) int diff = cycdiff - cycles; /* On 808x systems, clock speed is usually crystal frequency divided by an integer. */ - tsc += (uint64_t) diff * ((uint64_t) xt_cpu_multi >> 32ULL); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ + tsc += (uint64_t) diff * (xt_cpu_multi >> 32ULL); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ if (TIMER_VAL_LESS_THAN_VAL(timer_target, (uint32_t) tsc)) timer_process(); } @@ -237,7 +243,7 @@ setreg_i8080(i8080 *cpu, uint8_t reg, uint8_t val) } void -interpret_exec8080(i8080 *cpu, uint8_t opcode) +interpret_exec8080(UNUSED(i8080 *cpu), uint8_t opcode) { switch (opcode) { case 0x00: diff --git a/src/cpu/808x.c b/src/cpu/808x.c index e0419a9f7..690a6e0f9 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -35,6 +35,8 @@ #include <86box/ppi.h> #include <86box/timer.h> #include <86box/gdbstub.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> /* Is the CPU 8088 or 8086. */ int is8086 = 0; @@ -46,7 +48,8 @@ uint32_t custom_nmi_vector = 0x00000000; static uint8_t pfq[6]; /* Variables to aid with the prefetch queue operation. */ -static int biu_cycles = 0, pfq_pos = 0; +static int biu_cycles = 0; +static int pfq_pos = 0; /* The IP equivalent of the current prefetch queue position. */ static uint16_t pfq_ip; @@ -57,30 +60,37 @@ static x86seg *_opseg[4]; static int noint = 0; static int in_lock = 0; -static int cpu_alu_op, pfq_size; +static int cpu_alu_op; +static int pfq_size; -static uint32_t cpu_src = 0, cpu_dest = 0; +static uint32_t cpu_src = 0; +static uint32_t cpu_dest = 0; static uint32_t cpu_data = 0; static uint16_t last_addr = 0x0000; static uint32_t *ovr_seg = NULL; -static int prefetching = 1, completed = 1; -static int in_rep = 0, repeating = 0, rep_c_flag = 0; -static int oldc, clear_lock = 0; -static int refresh = 0, cycdiff; +static int prefetching = 1; +static int completed = 1; +static int in_rep = 0; +static int repeating = 0; +static int rep_c_flag = 0; +static int oldc; +static int clear_lock = 0; +static int refresh = 0; +static int cycdiff; -static int access_code = 0; -static int hlda = 0; -static int not_ready = 0; +static int access_code = 0; +static int hlda = 0; +static int not_ready = 0; static int bus_request_type = 0; -static int pic_data = -1; -static int last_was_code = 0; -static uint16_t mem_data = 0; -static uint32_t mem_seg = 0; -static uint16_t mem_addr = 0; -static int schedule_fetch = 1; -static int pasv = 0; +static int pic_data = -1; +static int last_was_code = 0; +static uint16_t mem_data = 0; +static uint32_t mem_seg = 0; +static uint16_t mem_addr = 0; +static int schedule_fetch = 1; +static int pasv = 0; #define BUS_OUT 1 #define BUS_HIGH 2 @@ -204,7 +214,7 @@ clock_end(void) int diff = cycdiff - cycles; /* On 808x systems, clock speed is usually crystal frequency divided by an integer. */ - tsc += ((uint64_t) diff * ((uint64_t) xt_cpu_multi >> 32ULL)); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ + tsc += ((uint64_t) diff * (xt_cpu_multi >> 32ULL)); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ if (TIMER_VAL_LESS_THAN_VAL(timer_target, (uint32_t) tsc)) timer_process(); } @@ -423,9 +433,7 @@ run_dma_cycle(int idle) static void cycles_idle(int c) { - int d; - - for (d = 0; d < c; d++) { + for (int d = 0; d < c; d++) { x808x_log("[%04X:%04X] %02X TI\n", CS, cpu_state.pc, opcode); cycles_forward(1); @@ -500,14 +508,12 @@ bus_init(void) static void wait(int c, int bus) { - int d; - if (c < 0) pclog("Negative cycles: %i!\n", c); x808x_log("[%04X:%04X] %02X %i cycles (%i)\n", CS, cpu_state.pc, opcode, c, bus); - for (d = 0; d < c; d++) { + for (int d = 0; d < c; d++) { x808x_log("[%04X:%04X] %02X cycle %i BIU\n", CS, cpu_state.pc, opcode, d); cycles_biu(bus, !d); x808x_log("[%04X:%04X] %02X cycle %i EU\n", CS, cpu_state.pc, opcode, d); @@ -530,12 +536,12 @@ sub_cycles(int c) void resub_cycles(int old_cycles) { - int i, cyc_diff = 0; + int cyc_diff = 0; if (old_cycles > cycles) { cyc_diff = old_cycles - cycles; - for (i = 0; i < cyc_diff; i++) { + for (int i = 0; i < cyc_diff; i++) { if (not_ready > 0) not_ready--; } @@ -934,7 +940,7 @@ reset_808x(int hard) _opseg[2] = &cpu_state.seg_ss; _opseg[3] = &cpu_state.seg_ds; - pfq_size = (is8086) ? 6 : 4; + pfq_size = is8086 ? 6 : 4; pfq_clear(); } @@ -1205,8 +1211,10 @@ pop(void) static void interrupt(uint16_t addr) { - uint16_t old_cs, old_ip; - uint16_t new_cs, new_ip; + uint16_t old_cs; + uint16_t old_ip; + uint16_t new_cs; + uint16_t new_ip; uint16_t tempf; addr <<= 2; @@ -1242,8 +1250,10 @@ interrupt_808x(uint16_t addr) static void custom_nmi(void) { - uint16_t old_cs, old_ip; - uint16_t new_cs, new_ip; + uint16_t old_cs; + uint16_t old_ip; + uint16_t new_cs; + uint16_t new_ip; uint16_t tempf; cpu_state.eaaddr = 0x0002; @@ -1376,7 +1386,7 @@ rep_interrupt(void) } static int -rep_action(int bits) +rep_action(UNUSED(int bits)) { uint16_t t; @@ -1562,14 +1572,14 @@ alu_op(int bits) case 2: if (cpu_state.flags & C_FLAG) cpu_src++; - /* Fall through. */ + fallthrough; case 0: add(bits); break; case 3: if (cpu_state.flags & C_FLAG) cpu_src++; - /* Fall through. */ + fallthrough; case 5: case 7: sub(bits); @@ -1600,10 +1610,11 @@ mul(uint16_t a, uint16_t b) { int negate = 0; int bit_count = 8; - int carry, i; + int carry; uint16_t high_bit = 0x80; uint16_t size_mask; - uint16_t c, r; + uint16_t c; + uint16_t r; size_mask = (1 << bit_count) - 1; @@ -1644,7 +1655,7 @@ mul(uint16_t a, uint16_t b) a &= size_mask; carry = (a & 1) != 0; a >>= 1; - for (i = 0; i < bit_count; ++i) { + for (int i = 0; i < bit_count; ++i) { wait(7, 0); if (carry) { cpu_src = c; @@ -1705,7 +1716,7 @@ set_pzs(int bits) } static void -set_co_mul(int bits, int carry) +set_co_mul(UNUSED(int bits), int carry) { set_cf(carry); set_of(carry); @@ -1718,10 +1729,11 @@ set_co_mul(int bits, int carry) static int x86_div(uint16_t l, uint16_t h) { - int b, bit_count = 8; + int bit_count = 8; int negative = 0; int dividend_negative = 0; - int size_mask, carry; + int size_mask; + int carry; uint16_t r; if (opcode & 1) { @@ -1765,7 +1777,7 @@ x86_div(uint16_t l, uint16_t h) wait(1, 0); wait(2, 0); carry = 1; - for (b = 0; b < bit_count; ++b) { + for (int b = 0; b < bit_count; ++b) { r = (l << 1) + (carry ? 1 : 0); carry = top_bit(l, bit_count); l = r; @@ -1948,20 +1960,46 @@ cpu_outw(uint16_t port, uint16_t val) void execx86(int cycs) { - uint8_t temp = 0, temp2, old_af, nests; - uint8_t temp_val, temp_al, bit, handled = 0; - uint8_t odd, zero, nibbles_count, destcmp; - uint8_t destbyte, srcbyte, nibble_result, bit_length; + uint8_t temp = 0; + uint8_t temp2; + uint8_t old_af; + uint8_t nests; + uint8_t temp_val; + uint8_t temp_al; + uint8_t bit; + uint8_t handled = 0; + uint8_t odd; + uint8_t zero; + uint8_t nibbles_count; + uint8_t destcmp; + uint8_t destbyte; + uint8_t srcbyte; + uint8_t nibble_result; + uint8_t bit_length; uint8_t bit_offset; int8_t nibble_result_s; - uint16_t addr, tempw, new_cs, new_ip; - uint16_t tempw_int, size, tempbp, lowbound; - uint16_t highbound, regval, orig_sp, wordtopush; - uint16_t immediate, old_flags; - uint16_t tmpa; + uint16_t addr; + uint16_t tempw; + uint16_t new_cs; + uint16_t new_ip; + uint16_t tempw_int; + uint16_t size; + uint16_t tempbp; + uint16_t lowbound; + uint16_t highbound; + uint16_t regval; + uint16_t orig_sp; + uint16_t wordtopush; + uint16_t immediate; + uint16_t old_flags; + uint16_t tmpa; int bits; - uint32_t dest_seg, i, carry, nibble; - uint32_t srcseg, byteaddr; + uint32_t dest_seg; + uint32_t i; + uint32_t carry; + uint32_t nibble; + uint32_t srcseg; + uint32_t byteaddr; cycles += cycs; @@ -1970,7 +2008,9 @@ execx86(int cycs) if (!repeating) { cpu_state.oldpc = cpu_state.pc; - // opcode = pfq_fetchb(); +#if 0 + opcode = pfq_fetchb(); +#endif opcode = pfq_fetchb_common(); handled = 0; oldc = cpu_state.flags & C_FLAG; @@ -2498,7 +2538,7 @@ execx86(int cycs) } for (i = 0; i < bit_length; i++) { byteaddr = (es) + DI; - writememb(es, DI, (read_mem_b(byteaddr) & ~(1 << (bit_offset))) | ((!!(AX & (1 << i))) << bit_offset)); + writememb(es, DI, (read_mem_b(byteaddr) & ~(1 << bit_offset)) | ((!!(AX & (1 << i))) << bit_offset)); bit_offset++; if (bit_offset == 8) { DI++; @@ -3063,7 +3103,7 @@ execx86(int cycs) bits = 8 << (opcode & 1); wait(2, 0); cpu_state.eaaddr = pfq_fetchw(); - set_accum(bits, readmem((ovr_seg ? *ovr_seg : ds))); + set_accum(bits, readmem(ovr_seg ? *ovr_seg : ds)); break; case 0xA2: case 0xA3: @@ -3453,55 +3493,55 @@ execx86(int cycs) if (fpu_softfloat) { switch (opcode) { case 0xD8: - ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f](rmdat); break; case 0xD9: - ops_sf_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_d9[rmdat & 0xff](rmdat); break; case 0xDA: - ops_sf_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_da[rmdat & 0xff](rmdat); break; case 0xDB: - ops_sf_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_db[rmdat & 0xff](rmdat); break; case 0xDC: - ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f](rmdat); break; case 0xDD: - ops_sf_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_dd[rmdat & 0xff](rmdat); break; case 0xDE: - ops_sf_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_de[rmdat & 0xff](rmdat); break; case 0xDF: - ops_sf_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_df[rmdat & 0xff](rmdat); break; } } else { switch (opcode) { case 0xD8: - ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_fpu_8087_d8[(rmdat >> 3) & 0x1f](rmdat); break; case 0xD9: - ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_d9[rmdat & 0xff](rmdat); break; case 0xDA: - ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_da[rmdat & 0xff](rmdat); break; case 0xDB: - ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_db[rmdat & 0xff](rmdat); break; case 0xDC: - ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_fpu_8087_dc[(rmdat >> 3) & 0x1f](rmdat); break; case 0xDD: - ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_dd[rmdat & 0xff](rmdat); break; case 0xDE: - ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_de[rmdat & 0xff](rmdat); break; case 0xDF: - ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_df[rmdat & 0xff](rmdat); break; } } @@ -3800,7 +3840,7 @@ execx86(int cycs) if (cpu_mod != 3) wait(1, 0); wait(4, 0); - push((uint16_t *) &(cpu_data)); + push((uint16_t *) &cpu_data); break; } break; diff --git a/src/cpu/808x/queue.c b/src/cpu/808x/queue.c index 66fd91e32..b37ee0fb0 100644 --- a/src/cpu/808x/queue.c +++ b/src/cpu/808x/queue.c @@ -44,8 +44,7 @@ /* NOTE: When porting from Rust to C, please use uintptr_t and not size_t, so it can be printed with PRIuPTR. */ -typedef struct queue_t -{ +typedef struct queue_t { uintptr_t size; uintptr_t len; uintptr_t back; diff --git a/src/cpu/808x/queue.h b/src/cpu/808x/queue.h index b8d21ec85..7c1998295 100644 --- a/src/cpu/808x/queue.h +++ b/src/cpu/808x/queue.h @@ -17,27 +17,26 @@ #ifndef EMU_QUEUE_H #define EMU_QUEUE_H -typedef enum queue_delay_t -{ +typedef enum queue_delay_t { DELAY_READ, DELAY_WRITE, DELAY_NONE } queue_delay_t; -#define FLAG_PRELOADED 0x8000 +#define FLAG_PRELOADED 0x8000 -extern void queue_set_size(uintptr_t size); -extern uintptr_t queue_get_len(void); -extern int queue_is_full(void); -extern uint16_t queue_get_preload(void); -extern int queue_has_preload(void); -extern void queue_set_preload(void); -extern void queue_push8(uint8_t byte); -extern void queue_push16(uint16_t word); -extern uint8_t queue_pop(void); -extern queue_delay_t queue_get_delay(void); -extern void queue_flush(void); +extern void queue_set_size(uintptr_t size); +extern uintptr_t queue_get_len(void); +extern int queue_is_full(void); +extern uint16_t queue_get_preload(void); +extern int queue_has_preload(void); +extern void queue_set_preload(void); +extern void queue_push8(uint8_t byte); +extern void queue_push16(uint16_t word); +extern uint8_t queue_pop(void); +extern queue_delay_t queue_get_delay(void); +extern void queue_flush(void); -extern void queue_init(void); +extern void queue_init(void); #endif /*EMU_QUEUE_H*/ diff --git a/src/cpu/codegen_timing_486.c b/src/cpu/codegen_timing_486.c index 2fe5ce417..e862b123e 100644 --- a/src/cpu/codegen_timing_486.c +++ b/src/cpu/codegen_timing_486.c @@ -3,8 +3,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -12,11 +14,11 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -#define CYCLES(c) (int *)c -#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) +#define CYCLES(c) (int *) c +#define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = -{ +static int *opcode_timings[256] = { + // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -36,10 +38,11 @@ static int *opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_mod3[256] = -{ +static int *opcode_timings_mod3[256] = { + // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -59,10 +62,11 @@ static int *opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_0f[256] = -{ +static int *opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -82,9 +86,10 @@ static int *opcode_timings_0f[256] = /*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, + // clang-format on }; -static int *opcode_timings_0f_mod3[256] = -{ +static int *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -104,60 +109,69 @@ static int *opcode_timings_0f_mod3[256] = /*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, + // clang-format on }; -static int *opcode_timings_shift[8] = -{ +static int *opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) }; -static int *opcode_timings_shift_mod3[8] = -{ +static int *opcode_timings_shift_mod3[8] = { +// clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static int *opcode_timings_f6[8] = -{ +static int *opcode_timings_f6[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f6_mod3[8] = -{ +static int *opcode_timings_f6_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f7[8] = -{ +static int *opcode_timings_f7[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_f7_mod3[8] = -{ +static int *opcode_timings_f7_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) }; -static int *opcode_timings_ff[8] = -{ +static int *opcode_timings_ff[8] = { +// clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL }; -static int *opcode_timings_ff_mod3[8] = -{ +static int *opcode_timings_ff_mod3[8] = { +// clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_d8[8] = -{ +static int *opcode_timings_d8[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_d8_mod3[8] = -{ +static int *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_d9[8] = -{ +static int *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(3), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) + // clang-format on }; -static int *opcode_timings_d9_mod3[64] = -{ +static int *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), /*FXCH*/ @@ -174,26 +188,29 @@ static int *opcode_timings_d9_mod3[64] = CYCLES(140), CYCLES(196), CYCLES(200), CYCLES(218), NULL, NULL, CYCLES(3), CYCLES(3), /* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ CYCLES(70), NULL, CYCLES(83), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(257), CYCLES(257) + // clang-format on }; -static int *opcode_timings_da[8] = -{ +static int *opcode_timings_da[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_da_mod3[8] = -{ +static int *opcode_timings_da_mod3[8] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL + // clang-format on }; - -static int *opcode_timings_db[8] = -{ +static int *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(9), NULL, CYCLES(28), CYCLES(28), NULL, CYCLES(5), NULL, CYCLES(6) + // clang-format on }; -static int *opcode_timings_db_mod3[64] = -{ +static int *opcode_timings_db_mod3[64] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -203,219 +220,242 @@ static int *opcode_timings_db_mod3[64] = NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // clang-format on }; -static int *opcode_timings_dc[8] = -{ +static int *opcode_timings_dc[8] = { + // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_dc_mod3[8] = -{ +static int *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(8), CYCLES(16), NULL, NULL, CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_dd[8] = -{ +static int *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(3), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(3) + // clang-format on }; -static int *opcode_timings_dd_mod3[8] = -{ +static int *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL + // clang-format on }; -static int *opcode_timings_de[8] = -{ +static int *opcode_timings_de[8] = { + // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_de_mod3[8] = -{ +static int *opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), NULL, CYCLES(5), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_df[8] = -{ +static int *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(13), NULL, CYCLES(29), CYCLES(29), NULL, CYCLES(10), CYCLES(172), CYCLES(28) + // clang-format on }; -static int *opcode_timings_df_mod3[8] = -{ +static int *opcode_timings_df_mod3[8] = { + // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL + // clang-format on }; -static int *opcode_timings_8x[8] = -{ +static int *opcode_timings_8x[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_8x_mod3[8] = -{ +static int *opcode_timings_8x_mod3[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_81[8] = -{ +static int *opcode_timings_81[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_81_mod3[8] = -{ +static int *opcode_timings_81_mod3[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static inline int COUNT(int *c, int op_32) +static inline int +COUNT(int *c, int op_32) { - if ((uintptr_t)c <= 10000) - return (int)(uintptr_t)c; - if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - return *c; + if ((uintptr_t) c <= 10000) + return (int) (uintptr_t) c; + if (((uintptr_t) c & ~0xffff) == (-1 & ~0xffff)) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + return *c; } -void codegen_timing_486_block_start(void) +void +codegen_timing_486_block_start(void) { - regmask_modified = 0; + regmask_modified = 0; } -void codegen_timing_486_start(void) +void +codegen_timing_486_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); - last_prefix = prefix; + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; } -void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - int **timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + int **timings; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - timing_count += COUNT(timings[opcode], op_32); - if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) - timing_count++; /*AGI stall*/ - codegen_block_cycles += timing_count; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -void codegen_timing_486_block_end(void) +void +codegen_timing_486_block_end(void) { + // } -codegen_timing_t codegen_timing_486 = -{ - codegen_timing_486_start, - codegen_timing_486_prefix, - codegen_timing_486_opcode, - codegen_timing_486_block_start, - codegen_timing_486_block_end, - NULL +codegen_timing_t codegen_timing_486 = { + codegen_timing_486_start, + codegen_timing_486_prefix, + codegen_timing_486_opcode, + codegen_timing_486_block_start, + codegen_timing_486_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_686.c b/src/cpu/codegen_timing_686.c index 7d7f4042d..a6800c5b2 100644 --- a/src/cpu/codegen_timing_686.c +++ b/src/cpu/codegen_timing_686.c @@ -13,8 +13,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -22,7 +24,7 @@ #include "codegen_timing_common.h" /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 31) +#define CYCLES_HAS_MULTI (1 << 31) #define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) @@ -31,39 +33,39 @@ /*Instruction follows either register timing, read-modify, or read-modify-write. May be pairable*/ -#define CYCLES_REG (1 << 0) -#define CYCLES_RM (1 << 0) -#define CYCLES_RMW (1 << 0) +#define CYCLES_REG (1 << 0) +#define CYCLES_RM (1 << 0) +#define CYCLES_RMW (1 << 0) #define CYCLES_BRANCH (1 << 0) -#define CYCLES_MASK ((1 << 7) - 1) +#define CYCLES_MASK ((1 << 7) - 1) /*Instruction does not pair*/ #define PAIR_NP (0 << 29) /*Instruction pairs in X pipe only*/ -#define PAIR_X (1 << 29) +#define PAIR_X (1 << 29) /*Instruction pairs in X pipe only, and can not pair with a following instruction*/ -#define PAIR_X_BRANCH (2 << 29) +#define PAIR_X_BRANCH (2 << 29) /*Instruction pairs in both X and Y pipes*/ -#define PAIR_XY (3 << 29) +#define PAIR_XY (3 << 29) #define PAIR_MASK (3 << 29) -#define INVALID 0 +#define INVALID 0 -static int prev_full; -static uint32_t prev_opcode; +static int prev_full; +static uint32_t prev_opcode; static uint32_t *prev_timings; -static uint32_t prev_op_32; -static uint32_t prev_regmask; +static uint32_t prev_op_32; +static uint32_t prev_regmask; static uint64_t *prev_deps; -static uint32_t prev_fetchdat; +static uint32_t prev_fetchdat; static uint32_t last_regmask_modified; static uint32_t regmask_modified; -static uint32_t opcode_timings[256] = -{ +static uint32_t opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, /* ADD ADD PUSH ES POP ES*/ @@ -196,10 +198,11 @@ static uint32_t opcode_timings[256] = PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), /* CLD STD INCDEC*/ PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_RMW, INVALID + // clang-format on }; -static uint32_t opcode_timings_mod3[256] = -{ +static uint32_t opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, /* ADD ADD PUSH ES POP ES*/ @@ -333,10 +336,11 @@ static uint32_t opcode_timings_mod3[256] = PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), /* CLD STD INCDEC*/ PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_REG, INVALID + // clang-format on }; -static uint32_t opcode_timings_0f[256] = -{ +static uint32_t opcode_timings_0f[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -416,9 +420,10 @@ static uint32_t opcode_timings_0f[256] = INVALID, PAIR_X | CYCLES_RM, INVALID, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = -{ +static uint32_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -497,106 +502,122 @@ static uint32_t opcode_timings_0f_mod3[256] = INVALID, PAIR_X | CYCLES_REG, INVALID, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, + // clang-format on }; -static uint32_t opcode_timings_shift[8] = -{ +static uint32_t opcode_timings_shift[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = -{ +static uint32_t opcode_timings_shift_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + // clang-format on }; -static uint32_t opcode_timings_shift_imm[8] = -{ +static uint32_t opcode_timings_shift_imm[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + // clang-format on }; -static uint32_t opcode_timings_shift_imm_mod3[8] = -{ +static uint32_t opcode_timings_shift_imm_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + // clang-format on }; -static uint32_t opcode_timings_shift_cl[8] = -{ +static uint32_t opcode_timings_shift_cl[8] = { + // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), + // clang-format on }; -static uint32_t opcode_timings_shift_cl_mod3[8] = -{ +static uint32_t opcode_timings_shift_cl_mod3[8] = { + // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), + // clang-format on }; -static uint32_t opcode_timings_f6[8] = -{ +static uint32_t opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_RM, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) + // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = -{ +static uint32_t opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) + // clang-format on }; -static uint32_t opcode_timings_f7[8] = -{ +static uint32_t opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) + // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = -{ +static uint32_t opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) + // clang-format on }; -static uint32_t opcode_timings_ff[8] = -{ +static uint32_t opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), /* JMP JMP far PUSH*/ PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(1), INVALID + // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = -{ +static uint32_t opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), /* JMP JMP far PUSH*/ PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), PAIR_XY | CYCLES(2), INVALID + // clang-format on }; -static uint32_t opcode_timings_d8[8] = -{ +static uint32_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), /* FSUBs FSUBRs FDIVs FDIVRs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = -{ +static uint32_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), /* FSUB FSUBR FDIV FDIVR*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_d9[8] = -{ +static uint32_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FLDENV FLDCW FSTENV FSTCW*/ PAIR_X | CYCLES(30), PAIR_X | CYCLES(4), PAIR_X | CYCLES(24), PAIR_X | CYCLES(5) + // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = -{ +static uint32_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -625,31 +646,34 @@ static uint32_t opcode_timings_d9_mod3[64] = PAIR_X | CYCLES(91), INVALID, PAIR_X | CYCLES(60), PAIR_X | CYCLES(161), /* opFRNDINT opFSCALE opFSIN opFCOS*/ PAIR_X | CYCLES(20), PAIR_X | CYCLES(14), PAIR_X | CYCLES(140), PAIR_X | CYCLES(141) + // clang-format on }; -static uint32_t opcode_timings_da[8] = -{ +static uint32_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ PAIR_X | CYCLES(29), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(48) + // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = -{ +static uint32_t opcode_timings_da_mod3[8] = { + // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, PAIR_X | CYCLES(5), INVALID, INVALID + // clang-format on }; - -static uint32_t opcode_timings_db[8] = -{ +static uint32_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FLDe FSTPe*/ INVALID, PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = -{ +static uint32_t opcode_timings_db_mod3[64] = { + // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -675,383 +699,387 @@ static uint32_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint32_t opcode_timings_dc[8] = -{ +static uint32_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), /* FSUBd FSUBRd FDIVd FDIVRd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = -{ +static uint32_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_dd[8] = -{ +static uint32_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FRSTOR FSAVE FSTSW*/ PAIR_X | CYCLES(72), INVALID, PAIR_X | CYCLES(67), PAIR_X | CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = -{ +static uint32_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ PAIR_X | CYCLES(3), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FUCOM FUCOMP*/ PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_de[8] = -{ +static uint32_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_X | CYCLES(27), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(38) }; -static uint32_t opcode_timings_de_mod3[8] = -{ +static uint32_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, PAIR_X | CYCLES(7), /* FSUB FSUBR FDIV FDIVR*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_df[8] = -{ +static uint32_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_X | CYCLES(8), INVALID, PAIR_X | CYCLES(10), PAIR_X | CYCLES(13), /* FILDiq FBSTP FISTPiq*/ INVALID, PAIR_X | CYCLES(8), PAIR_X | CYCLES(63), PAIR_X | CYCLES(13) + // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = -{ +static uint32_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ PAIR_X | CYCLES(6), INVALID, INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_8x[8] = -{ +static uint32_t opcode_timings_8x[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM + // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = -{ +static uint32_t opcode_timings_8x_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG + // clang-format on }; -static uint32_t opcode_timings_81[8] = -{ +static uint32_t opcode_timings_81[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM + // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = -{ +static uint32_t opcode_timings_81_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG + // clang-format on }; -static int decode_delay; +static int decode_delay; static uint8_t last_prefix; -static inline int COUNT(uint32_t c, int op_32) +static inline int +COUNT(uint32_t c, int op_32) { - if (c & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - if (!(c & PAIR_MASK)) - return c & 0xffff; + if (c & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + if (!(c & PAIR_MASK)) + return c & 0xffff; - return c & CYCLES_MASK; + return c & CYCLES_MASK; } -void codegen_timing_686_block_start(void) +void +codegen_timing_686_block_start(void) { - prev_full = decode_delay = 0; - regmask_modified = last_regmask_modified = 0; + prev_full = decode_delay = 0; + regmask_modified = last_regmask_modified = 0; } -void codegen_timing_686_start(void) +void +codegen_timing_686_start(void) { - decode_delay = 0; - last_prefix = 0; + decode_delay = 0; + last_prefix = 0; } -void codegen_timing_686_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_686_prefix(uint8_t prefix, uint32_t fetchdat) { - if ((prefix & 0xf8) == 0xd8) - { - last_prefix = prefix; - return; - } - if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) - { - /*0fh prefix is 'free' when used on conditional jumps*/ - last_prefix = prefix; - return; - } - - /*6x86 can decode 1 prefix per instruction per clock with no penalty. If - either instruction has more than one prefix then decode is delayed by - one cycle for each additional prefix*/ - decode_delay++; + if ((prefix & 0xf8) == 0xd8) { last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) { + /*0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + + /*6x86 can decode 1 prefix per instruction per clock with no penalty. If + either instruction has more than one prefix then decode is delayed by + one cycle for each additional prefix*/ + decode_delay++; + last_prefix = prefix; } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - if (addr_regmask & IMPL_ESP) - addr_regmask |= (1 << REG_ESP); + if (addr_regmask & IMPL_ESP) + addr_regmask |= (1 << REG_ESP); - if (regmask_modified & addr_regmask) - { - regmask_modified = 0; - return 2; - } + if (regmask_modified & addr_regmask) { + regmask_modified = 0; + return 2; + } - if (last_regmask_modified & addr_regmask) - return 1; + if (last_regmask_modified & addr_regmask) + return 1; - return 0; + return 0; } -void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint32_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd0: + case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: - timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + /*One prefix per instruction is free*/ + decode_delay--; + if (decode_delay < 0) + decode_delay = 0; - case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (prev_full) { + uint32_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, op_32); + int agi_stall = 0; - case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; - deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; - opcode = (fetchdat >> 3) & 7; - break; + if (regmask & IMPL_ESP) + regmask |= SRCDEP_ESP | DSTDEP_ESP; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + agi_stall = check_agi(prev_deps, prev_opcode, prev_fetchdat, prev_op_32); - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } + /*Second instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else if (((timings[opcode] & PAIR_MASK) == PAIR_X || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) + && (prev_timings[opcode] & PAIR_MASK) == PAIR_X) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else if (prev_regmask & regmask) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else { + int t1 = COUNT(prev_timings[prev_opcode], prev_op_32); + int t2 = COUNT(timings[opcode], op_32); + int t_pair = (t1 > t2) ? t1 : t2; + + if (!t_pair) + fatal("Pairable 0 cycles! %02x %02x\n", opcode, prev_opcode); + + agi_stall = check_agi(deps, opcode, fetchdat, op_32); + + codegen_block_cycles += t_pair + agi_stall; + decode_delay = (-t_pair) + 1 + agi_stall; + + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | prev_regmask; + prev_full = 0; + return; } + } - /*One prefix per instruction is free*/ - decode_delay--; - if (decode_delay < 0) - decode_delay = 0; + if (!prev_full) { + /*First instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) { + /*Instruction not pairable*/ + int agi_stall = 0; - if (prev_full) - { - uint32_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, op_32); - int agi_stall = 0; + agi_stall = check_agi(deps, opcode, fetchdat, op_32); - if (regmask & IMPL_ESP) - regmask |= SRCDEP_ESP | DSTDEP_ESP; - - agi_stall = check_agi(prev_deps, prev_opcode, prev_fetchdat, prev_op_32); - - /*Second instruction in the pair*/ - if ((timings[opcode] & PAIR_MASK) == PAIR_NP) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else if (((timings[opcode] & PAIR_MASK) == PAIR_X || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) - && (prev_timings[opcode] & PAIR_MASK) == PAIR_X) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else if (prev_regmask & regmask) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else - { - int t1 = COUNT(prev_timings[prev_opcode], prev_op_32); - int t2 = COUNT(timings[opcode], op_32); - int t_pair = (t1 > t2) ? t1 : t2; - - if (!t_pair) - fatal("Pairable 0 cycles! %02x %02x\n", opcode, prev_opcode); - - agi_stall = check_agi(deps, opcode, fetchdat, op_32); - - codegen_block_cycles += t_pair + agi_stall; - decode_delay = (-t_pair) + 1 + agi_stall; - - last_regmask_modified = regmask_modified; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | prev_regmask; - prev_full = 0; - return; - } - } - - if (!prev_full) - { - /*First instruction in the pair*/ - if ((timings[opcode] & PAIR_MASK) == PAIR_NP || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) - { - /*Instruction not pairable*/ - int agi_stall = 0; - - agi_stall = check_agi(deps, opcode, fetchdat, op_32); - - codegen_block_cycles += COUNT(timings[opcode], op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(timings[opcode], op_32)) + 1 + agi_stall; - last_regmask_modified = regmask_modified; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); - } - else - { - /*Instruction might pair with next*/ - prev_full = 1; - prev_opcode = opcode; - prev_timings = timings; - prev_op_32 = op_32; - prev_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - if (prev_regmask & IMPL_ESP) - prev_regmask |= SRCDEP_ESP | DSTDEP_ESP; - prev_deps = deps; - prev_fetchdat = fetchdat; - return; - } + codegen_block_cycles += COUNT(timings[opcode], op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(timings[opcode], op_32)) + 1 + agi_stall; + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + } else { + /*Instruction might pair with next*/ + prev_full = 1; + prev_opcode = opcode; + prev_timings = timings; + prev_op_32 = op_32; + prev_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + if (prev_regmask & IMPL_ESP) + prev_regmask |= SRCDEP_ESP | DSTDEP_ESP; + prev_deps = deps; + prev_fetchdat = fetchdat; + return; } + } } -void codegen_timing_686_block_end(void) +void +codegen_timing_686_block_end(void) { - if (prev_full) - { - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay; - prev_full = 0; - } + if (prev_full) { + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay; + prev_full = 0; + } } -codegen_timing_t codegen_timing_686 = -{ - codegen_timing_686_start, - codegen_timing_686_prefix, - codegen_timing_686_opcode, - codegen_timing_686_block_start, - codegen_timing_686_block_end, - NULL +codegen_timing_t codegen_timing_686 = { + codegen_timing_686_start, + codegen_timing_686_prefix, + codegen_timing_686_opcode, + codegen_timing_686_block_start, + codegen_timing_686_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_common.c b/src/cpu/codegen_timing_common.c index a1f1b6ce7..0c538fc4e 100644 --- a/src/cpu/codegen_timing_common.c +++ b/src/cpu/codegen_timing_common.c @@ -8,8 +8,8 @@ #include "codegen_timing_common.h" -uint64_t opcode_deps[256] = -{ +uint64_t opcode_deps[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, /* ADD ADD PUSH ES POP ES*/ @@ -140,10 +140,11 @@ uint64_t opcode_deps[256] = 0, 0, 0, 0, /* CLD STD INCDEC*/ 0, 0, MODRM, 0 + // clang-format on }; -uint64_t opcode_deps_mod3[256] = -{ +uint64_t opcode_deps_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, /* ADD ADD PUSH ES POP ES*/ @@ -274,10 +275,11 @@ uint64_t opcode_deps_mod3[256] = 0, 0, 0, 0, /* CLD STD INCDEC*/ 0, 0, SRCDEP_RM | DSTDEP_RM | MODRM, 0 + // clang-format on }; -uint64_t opcode_deps_0f[256] = -{ +uint64_t opcode_deps_0f[256] = { + // clang-format off /*00*/ MODRM, MODRM, MODRM, MODRM, 0, 0, 0, 0, 0, 0, 0, 0, @@ -357,9 +359,10 @@ uint64_t opcode_deps_0f[256] = 0, MODRM | MMX_MULTIPLY, 0, 0, MODRM, MODRM, MODRM, 0, MODRM, MODRM, MODRM, 0, + // clang-format on }; -uint64_t opcode_deps_0f_mod3[256] = -{ +uint64_t opcode_deps_0f_mod3[256] = { + // clang-format off /*00*/ MODRM, MODRM, MODRM, MODRM, 0, 0, 0, 0, 0, 0, 0, 0, @@ -439,10 +442,11 @@ uint64_t opcode_deps_0f_mod3[256] = 0, MODRM | MMX_MULTIPLY, 0, 0, MODRM, MODRM, MODRM, 0, MODRM, MODRM, MODRM, 0, + // clang-format on }; -uint64_t opcode_deps_0f0f[256] = -{ +uint64_t opcode_deps_0f0f[256] = { + // clang-format off /*00*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -522,9 +526,10 @@ uint64_t opcode_deps_0f0f[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_0f0f_mod3[256] = -{ +uint64_t opcode_deps_0f0f_mod3[256] = { + // clang-format off /*00*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -604,97 +609,111 @@ uint64_t opcode_deps_0f0f_mod3[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_shift[8] = -{ +uint64_t opcode_deps_shift[8] = { + // clang-format off MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, + // clang-format on }; -uint64_t opcode_deps_shift_mod3[8] = -{ +uint64_t opcode_deps_shift_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, + // clang-format on }; -uint64_t opcode_deps_shift_cl[8] = -{ +uint64_t opcode_deps_shift_cl[8] = { + // clang-format off MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, + // clang-format on }; -uint64_t opcode_deps_shift_cl_mod3[8] = -{ +uint64_t opcode_deps_shift_cl_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, + // clang-format on }; -uint64_t opcode_deps_f6[8] = -{ +uint64_t opcode_deps_f6[8] = { + // clang-format off /* TST NOT NEG*/ MODRM, 0, MODRM, MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f6_mod3[8] = -{ +uint64_t opcode_deps_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f7[8] = -{ +uint64_t opcode_deps_f7[8] = { + // clang-format off /* TST NOT NEG*/ MODRM, 0, MODRM, MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f7_mod3[8] = -{ +uint64_t opcode_deps_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_ff[8] = -{ +uint64_t opcode_deps_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ MODRM, MODRM, MODRM | IMPL_ESP, MODRM, /* JMP JMP far PUSH*/ MODRM, MODRM, MODRM | IMPL_ESP, 0 + // clang-format on }; -uint64_t opcode_deps_ff_mod3[8] = -{ +uint64_t opcode_deps_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | MODRM | IMPL_ESP, MODRM, /* JMP JMP far PUSH*/ SRCDEP_RM | MODRM, MODRM, SRCDEP_RM | MODRM | IMPL_ESP, 0 + // clang-format on }; -uint64_t opcode_deps_d8[8] = -{ +uint64_t opcode_deps_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_POP | FPU_READ_ST0 | MODRM, /* FSUBs FSUBRs FDIVs FDIVRs*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_d8_mod3[8] = -{ +uint64_t opcode_deps_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG, FPU_POP | FPU_READ_ST0 | FPU_READ_STREG, /* FSUB FSUBR FDIV FDIVR*/ FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG + // clang-format on }; -uint64_t opcode_deps_d9[8] = -{ +uint64_t opcode_deps_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_POP | MODRM, /* FLDENV FLDCW FSTENV FSTCW*/ MODRM, MODRM, MODRM, MODRM + // clang-format on }; -uint64_t opcode_deps_d9_mod3[64] = -{ +uint64_t opcode_deps_d9_mod3[64] = { + // clang-format off /*FLD*/ FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, @@ -722,32 +741,35 @@ uint64_t opcode_deps_d9_mod3[64] = 0, 0, 0, 0, /* opFRNDINT opFSCALE opFSIN opFCOS*/ 0, 0, 0, 0 + // clang-format on }; -uint64_t opcode_deps_da[8] = -{ +uint64_t opcode_deps_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_da_mod3[8] = -{ +uint64_t opcode_deps_da_mod3[8] = { + // clang-format off 0, 0, 0, 0, /* FCOMPP*/ 0, FPU_POP2, 0, 0 + // clang-format on }; - -uint64_t opcode_deps_db[8] = -{ +uint64_t opcode_deps_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FLDe FSTPe*/ 0, FPU_PUSH | MODRM, 0, FPU_READ_ST0 | FPU_POP | MODRM + // clang-format on }; -uint64_t opcode_deps_db_mod3[64] = -{ +uint64_t opcode_deps_db_mod3[64] = { + // clang-format off 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -767,84 +789,97 @@ uint64_t opcode_deps_db_mod3[64] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_dc[8] = -{ +uint64_t opcode_deps_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FSUBd FSUBRd FDIVd FDIVRd*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_dc_mod3[8] = -{ +uint64_t opcode_deps_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, 0, 0, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG + // clang-format on }; -uint64_t opcode_deps_dd[8] = -{ +uint64_t opcode_deps_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FRSTOR FSAVE FSTSW*/ MODRM, 0, MODRM, MODRM + // clang-format on }; -uint64_t opcode_deps_dd_mod3[8] = -{ +uint64_t opcode_deps_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ 0, 0, FPU_READ_ST0 | FPU_WRITE_STREG, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, /* FUCOM FUCOMP*/ FPU_READ_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG | FPU_POP, 0, 0 + // clang-format on }; -uint64_t opcode_deps_de[8] = -{ +uint64_t opcode_deps_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_de_mod3[8] = -{ +uint64_t opcode_deps_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, 0, FPU_READ_ST0 | FPU_READ_ST1 | FPU_POP2, /* FSUBP FSUBRP FDIVP FDIVRP*/ FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP + // clang-format on }; -uint64_t opcode_deps_df[8] = -{ +uint64_t opcode_deps_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FILDiq FBSTP FISTPiq*/ 0, FPU_PUSH | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, FPU_READ_ST0 | FPU_POP | MODRM + // clang-format on }; -uint64_t opcode_deps_df_mod3[8] = -{ +uint64_t opcode_deps_df_mod3[8] = { + // clang-format off 0, 0, 0, 0, /* FSTSW AX*/ 0, 0, 0, 0 + // clang-format on }; -uint64_t opcode_deps_81[8] = -{ +uint64_t opcode_deps_81[8] = { + // clang-format off MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632 + // clang-format on }; -uint64_t opcode_deps_81_mod3[8] = -{ +uint64_t opcode_deps_81_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | MODRM | HAS_IMM1632 + // clang-format on }; -uint64_t opcode_deps_8x[8] = -{ +uint64_t opcode_deps_8x[8] = { + // clang-format off MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8 + // clang-format on }; -uint64_t opcode_deps_8x_mod3[8] = -{ +uint64_t opcode_deps_8x_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | MODRM | HAS_IMM8 + // clang-format on }; diff --git a/src/cpu/codegen_timing_common.h b/src/cpu/codegen_timing_common.h index 679997802..cc3ff5a6f 100644 --- a/src/cpu/codegen_timing_common.h +++ b/src/cpu/codegen_timing_common.h @@ -1,79 +1,79 @@ #include "codegen_ops.h" /*Instruction has input dependency on register in REG field*/ -#define SRCDEP_REG (1ull << 0) +#define SRCDEP_REG (1ULL << 0) /*Instruction has input dependency on register in R/M field*/ -#define SRCDEP_RM (1ull << 1) +#define SRCDEP_RM (1ULL << 1) /*Instruction modifies register in REG field*/ -#define DSTDEP_REG (1ull << 2) +#define DSTDEP_REG (1ULL<< 2) /*Instruction modifies register in R/M field*/ -#define DSTDEP_RM (1ull << 3) +#define DSTDEP_RM (1ULL << 3) #define SRCDEP_SHIFT 4 #define DSTDEP_SHIFT 12 /*Instruction has input dependency on given register*/ -#define SRCDEP_EAX (1ull << 4) -#define SRCDEP_ECX (1ull << 5) -#define SRCDEP_EDX (1ull << 6) -#define SRCDEP_EBX (1ull << 7) -#define SRCDEP_ESP (1ull << 8) -#define SRCDEP_EBP (1ull << 9) -#define SRCDEP_ESI (1ull << 10) -#define SRCDEP_EDI (1ull << 11) +#define SRCDEP_EAX (1ULL << 4) +#define SRCDEP_ECX (1ULL << 5) +#define SRCDEP_EDX (1ULL << 6) +#define SRCDEP_EBX (1ULL << 7) +#define SRCDEP_ESP (1ULL << 8) +#define SRCDEP_EBP (1ULL << 9) +#define SRCDEP_ESI (1ULL << 10) +#define SRCDEP_EDI (1ULL << 11) /*Instruction modifies given register*/ -#define DSTDEP_EAX (1ull << 12) -#define DSTDEP_ECX (1ull << 13) -#define DSTDEP_EDX (1ull << 14) -#define DSTDEP_EBX (1ull << 15) -#define DSTDEP_ESP (1ull << 16) -#define DSTDEP_EBP (1ull << 17) -#define DSTDEP_ESI (1ull << 18) -#define DSTDEP_EDI (1ull << 19) +#define DSTDEP_EAX (1ULL << 12) +#define DSTDEP_ECX (1ULL << 13) +#define DSTDEP_EDX (1ULL << 14) +#define DSTDEP_EBX (1ULL << 15) +#define DSTDEP_ESP (1ULL << 16) +#define DSTDEP_EBP (1ULL << 17) +#define DSTDEP_ESI (1ULL << 18) +#define DSTDEP_EDI (1ULL << 19) /*Instruction has ModR/M byte*/ -#define MODRM (1ull << 20) +#define MODRM (1ULL << 20) /*Instruction implicitly uses ESP*/ -#define IMPL_ESP (1ull << 21) +#define IMPL_ESP (1ULL << 21) /*Instruction is MMX shift or pack/unpack instruction*/ -#define MMX_SHIFTPACK (1ull << 22) +#define MMX_SHIFTPACK (1ULL << 22) /*Instruction is MMX multiply instruction*/ -#define MMX_MULTIPLY (1ull << 23) +#define MMX_MULTIPLY (1ULL << 23) /*Instruction pops the FPU stack*/ -#define FPU_POP (1ull << 24) +#define FPU_POP (1ULL << 24) /*Instruction pops the FPU stack twice*/ -#define FPU_POP2 (1ull << 25) +#define FPU_POP2 (1ULL << 25) /*Instruction pushes onto the FPU stack*/ -#define FPU_PUSH (1ull << 26) +#define FPU_PUSH (1ULL << 26) /*Instruction writes to ST(0)*/ -#define FPU_WRITE_ST0 (1ull << 27) +#define FPU_WRITE_ST0 (1ULL << 27) /*Instruction reads from ST(0)*/ -#define FPU_READ_ST0 (1ull << 28) +#define FPU_READ_ST0 (1ULL << 28) /*Instruction reads from and writes to ST(0)*/ -#define FPU_RW_ST0 (3ull << 27) +#define FPU_RW_ST0 (3ULL << 27) /*Instruction reads from ST(1)*/ -#define FPU_READ_ST1 (1ull << 29) +#define FPU_READ_ST1 (1ULL << 29) /*Instruction writes to ST(1)*/ -#define FPU_WRITE_ST1 (1ull << 30) +#define FPU_WRITE_ST1 (1ULL << 30) /*Instruction reads from and writes to ST(1)*/ -#define FPU_RW_ST1 (3ull << 29) +#define FPU_RW_ST1 (3ULL << 29) /*Instruction reads from ST(reg)*/ -#define FPU_READ_STREG (1ull << 31) +#define FPU_READ_STREG (1ULL << 31) /*Instruction writes to ST(reg)*/ -#define FPU_WRITE_STREG (1ull << 32) +#define FPU_WRITE_STREG (1ULL << 32) /*Instruction reads from and writes to ST(reg)*/ -#define FPU_RW_STREG (3ull << 31) +#define FPU_RW_STREG (3ULL << 31) -#define FPU_FXCH (1ull << 33) +#define FPU_FXCH (1ULL << 33) -#define HAS_IMM8 (1ull << 34) -#define HAS_IMM1632 (1ull << 35) +#define HAS_IMM8 (1ULL << 34) +#define HAS_IMM1632 (1ULL << 35) #define REGMASK_IMPL_ESP (1 << 8) #define REGMASK_SHIFTPACK (1 << 9) diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index 6a59fc157..88215bb17 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -8,6 +8,7 @@ #include <86box/mem.h> #include "cpu.h" #include <86box/machine.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -16,865 +17,748 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -typedef enum uop_type_t -{ - UOP_ALU = 0, /*Executes in Integer X or Y units*/ - UOP_ALUX, /*Executes in Integer X unit*/ - UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ - UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ - UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ - UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ - UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ - UOP_BRANCH, /*Executes in Branch unit*/ - UOP_LIMM /*Does not require an execution unit*/ +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Integer X or Y units*/ + UOP_ALUX, /*Executes in Integer X unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORE, /*Executes in Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORE, /*Executes in Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORE, /*Executes in Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MEU, /*Executes in Multimedia unit*/ + UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ + UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ + UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; -typedef enum decode_type_t -{ - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR +typedef enum decode_type_t { + DECODE_SHORT, + DECODE_LONG, + DECODE_VECTOR } decode_type_t; #define MAX_UOPS 10 -typedef struct risc86_uop_t -{ - uop_type_t type; - int throughput; - int latency; +typedef struct risc86_uop_t { + uop_type_t type; + int throughput; + int latency; } risc86_uop_t; -typedef struct risc86_instruction_t -{ - int nr_uops; - decode_type_t decode_type; - risc86_uop_t uop[MAX_UOPS]; +typedef struct risc86_instruction_t { + int nr_uops; + decode_type_t decode_type; + risc86_uop_t uop[MAX_UOPS]; } risc86_instruction_t; -static const risc86_instruction_t alu_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alux_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t alux_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t branch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t limm_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} +static const risc86_instruction_t limm_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t store_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t store_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} }; - -static const risc86_instruction_t bswap_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t bswap_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t leave_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t lods_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t loop_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mov_reg_seg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, +static const risc86_instruction_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, }; -static const risc86_instruction_t movs_op = -{ - .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pop_reg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pop_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t pop_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t push_imm_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, +static const risc86_instruction_t push_imm_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, }; -static const risc86_instruction_t push_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t push_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t push_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t stos_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t stos_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_reg_b_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_mem_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_mem_imm_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t xchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t m3dn_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +static const risc86_instruction_t m3dn_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mmx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +static const risc86_instruction_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mmx_mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t mmx_shift_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +static const risc86_instruction_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_3dn_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_3dn_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_3DN, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_mmx_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_mmx_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t load_mmx_shift_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mload_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t mstore_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t mstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t pmul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t load_float_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t fstore_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t fstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t fdiv_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +static const risc86_instruction_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} }; -static const risc86_instruction_t fdiv_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +static const risc86_instruction_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_FLOAT, .throughput = 40, .latency = 40} }; -static const risc86_instruction_t fsin_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} +static const risc86_instruction_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} }; -static const risc86_instruction_t fsqrt_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} +static const risc86_instruction_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} }; -static const risc86_instruction_t vector_fldcw_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} +static const risc86_instruction_t vector_fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} }; -static const risc86_instruction_t vector_float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t vector_float_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t vector_float_l_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} +static const risc86_instruction_t vector_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t vector_flde_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t vector_fste_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_fste_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, + .uop[1] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux6_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_bound_op = -{ - .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_bound_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_call_far_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} +static const risc86_instruction_t vector_cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} }; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +static const risc86_instruction_t vector_cmpxchg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, }; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +static const risc86_instruction_t vector_cmpxchg_b_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, }; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} +static const risc86_instruction_t vector_cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} }; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_div16_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +static const risc86_instruction_t vector_div32_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} }; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +static const risc86_instruction_t vector_div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 18, .latency = 18} }; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} +static const risc86_instruction_t vector_emms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} }; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_enter_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALU, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} +static const risc86_instruction_t vector_femms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} }; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} +static const risc86_instruction_t vector_in_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} }; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_ins_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_int_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_iret_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[3] = { .type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} +static const risc86_instruction_t vector_invd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} }; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_loop_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_lss_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mov_mem_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul64_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_out_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_out_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_outs_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1 }, + .uop[1] = { .type = UOP_STORE, .throughput = 10, .latency = 10}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_pusha_op = -{ - .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = -{ - .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_popa_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_popf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} +static const risc86_instruction_t vector_popf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 17, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_push_mem_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_pushf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_ret_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_retf_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_scas_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_setcc_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_xchg_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t vector_xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} +static const risc86_instruction_t vector_wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} }; #define INVALID NULL -static const risc86_instruction_t *opcode_timings[256] = -{ +static const risc86_instruction_t *opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1007,10 +891,11 @@ static const risc86_instruction_t *opcode_timings[256] = &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, /* CLD STD INCDEC*/ &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1143,10 +1028,11 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, /* CLD STD INCDEC*/ &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f[256] = -{ +static const risc86_instruction_t *opcode_timings_0f[256] = { + // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, @@ -1226,9 +1112,10 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, &pmul_mem_op, INVALID, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, @@ -1308,10 +1195,11 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = INVALID, &pmul_op, INVALID, INVALID, &mmx_op, &mmx_op, &mmx_op, INVALID, &mmx_op, &mmx_op, &mmx_op, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ +static const risc86_instruction_t *opcode_timings_0f0f[256] = { + // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1391,10 +1279,10 @@ static const risc86_instruction_t *opcode_timings_0f0f[256] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, - + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { + // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1474,118 +1362,135 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, - + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift[8] = -{ +static const risc86_instruction_t *opcode_timings_shift[8] = { + // clang-format off &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_b[8] = { + // clang-format off &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { + // clang-format off &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { + // clang-format off &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &alux_op, &alux_op, &alux_op, &alux_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_80[8] = -{ +static const risc86_instruction_t *opcode_timings_80[8] = { + // clang-format off &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_80_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_80_mod3[8] = { + // clang-format off &alux_op, &alux_op, &alux_store_op, &alux_store_op, &alux_op, &alux_op, &alux_op, &alux_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x[8] = -{ +static const risc86_instruction_t *opcode_timings_8x[8] = { + // clang-format off &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { + // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6[8] = -{ +static const risc86_instruction_t *opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, /* MUL IMUL DIV IDIV*/ &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7[8] = -{ +static const risc86_instruction_t *opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, /* MUL IMUL DIV IDIV*/ &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff[8] = -{ +static const risc86_instruction_t *opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, /* JMP JMP far PUSH*/ &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, /* JMP JMP far PUSH*/ &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8[8] = -{ +static const risc86_instruction_t *opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ &float_op, &float_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9[8] = -{ +static const risc86_instruction_t *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9_mod3[64] = -{ +static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, @@ -1614,31 +1519,35 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &fdiv_op, INVALID, &fsqrt_op, &fsin_op, /* opFRNDINT opFSCALE opFSIN opFCOS*/ &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_da[8] = -{ +static const risc86_instruction_t *opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_da_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, &float_op, INVALID, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_db[8] = -{ +static const risc86_instruction_t *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ INVALID, &vector_flde_op, INVALID, &vector_fste_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_db_mod3[64] = -{ +static const risc86_instruction_t *opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1664,108 +1573,113 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc[8] = -{ +static const risc86_instruction_t *opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ &float_op, &float_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd[8] = -{ +static const risc86_instruction_t *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, /* FUCOM FUCOMP*/ &float_op, &float_op, INVALID, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_de[8] = -{ +static const risc86_instruction_t *opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_de_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ &float_op, &float_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_df[8] = -{ +static const risc86_instruction_t *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_df_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ &float_op, INVALID, INVALID, INVALID + // clang-format on }; - static uint8_t last_prefix; -static int prefixes; +static int prefixes; static int decode_timestamp; static int last_complete_timestamp; -typedef struct k6_unit_t -{ - uint32_t uop_mask; - int first_available_cycle; +typedef struct k6_unit_t { + uint32_t uop_mask; + int first_available_cycle; } k6_unit_t; -static int nr_units; +static int nr_units; static k6_unit_t *units; /*K6 has dedicated MMX unit*/ -static k6_unit_t k6_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, /*Multimedia*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +static k6_unit_t k6_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) }, /*Integer X*/ + { .uop_mask = (1 << UOP_ALU) }, /*Integer Y*/ + { .uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) }, /*Multimedia*/ + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ }; #define NR_K6_UNITS (sizeof(k6_units) / sizeof(k6_unit_t)) /*K6-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and 3DNow ALU between two execution units*/ -static k6_unit_t k6_2_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +static k6_unit_t k6_2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ }; #define NR_K6_2_UNITS (sizeof(k6_2_units) / sizeof(k6_unit_t)) @@ -1775,57 +1689,52 @@ static int mul_first_available_cycle; static int shift_first_available_cycle; static int m3dnow_first_available_cycle; -static int uop_run(const risc86_uop_t *uop, int decode_time) +static int +uop_run(const risc86_uop_t *uop, int decode_time) { - int c; - k6_unit_t *best_unit = NULL; - int best_start_cycle = 99999; + k6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; - /*UOP_LIMM does not require execution*/ - if (uop->type == UOP_LIMM) - return decode_time; + /*UOP_LIMM does not require execution*/ + if (uop->type == UOP_LIMM) + return decode_time; - /*Handle shared units on K6-2 and later*/ - if (units == k6_2_units) - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) - decode_time = m3dnow_first_available_cycle; + /*Handle shared units on K6-2 and later*/ + if (units == k6_2_units) { + if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) + decode_time = mul_first_available_cycle; + else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) + decode_time = shift_first_available_cycle; + else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) + decode_time = m3dnow_first_available_cycle; + } + + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); - /*Find execution unit for this uOP*/ - for (c = 0; c < nr_units; c++) - { - if (units[c].uop_mask & (1 << uop->type)) - { - if (units[c].first_available_cycle < best_start_cycle) - { - best_unit = &units[c]; - best_start_cycle = units[c].first_available_cycle; - } - } - } - if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->throughput; - if (best_start_cycle < decode_time) - best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + if (units == k6_2_units) { + if (uop->type == UOP_MEU_MUL) + mul_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_SHIFT) + shift_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_3DN) + m3dnow_first_available_cycle = best_start_cycle + uop->throughput; + } - if (units == k6_2_units) - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_3DN) - m3dnow_first_available_cycle = best_start_cycle + uop->throughput; - } - - return best_start_cycle + uop->throughput; + return best_start_cycle + uop->throughput; } /*The K6 decoder can decode, per clock : @@ -1833,14 +1742,13 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) - 1 'long' instruction, up to 4 uOPs - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) */ -static struct -{ - int nr_uops; - const risc86_uop_t *uops[4]; - /*Earliest time a uop can start. If the timestamp is -1, then the uop is - part of a dependency chain and the start time is the completion time of - the previous uop*/ - int earliest_start[4]; +static struct { + int nr_uops; + const risc86_uop_t *uops[4]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[4]; } decode_buffer; #define NR_OPQUADS 6 @@ -1858,495 +1766,465 @@ static int fpu_st_timestamp[8]; dependent uop chains*/ static int last_uop_timestamp = 0; -void decode_flush(void) +void +decode_flush(void) { - int c; - int uop_timestamp = 0; + int uop_timestamp = 0; - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + /*Decoded opquad can not be submitted if there are no free spaces in the + opquad buffer*/ + if (decode_timestamp < opquad_completion_timestamp[next_opquad]) + decode_timestamp = opquad_completion_timestamp[next_opquad]; - /*Ensure that uops can not be submitted before they have been decoded*/ - if (decode_timestamp > last_uop_timestamp) - last_uop_timestamp = decode_timestamp; + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; - /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) - { - int start_timestamp; + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < decode_buffer.nr_uops; c++) { + int start_timestamp; - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; - - last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); - if (last_uop_timestamp > uop_timestamp) - uop_timestamp = last_uop_timestamp; - } - - /*Calculate opquad completion time. Since opquads complete in order, it - must be after the last completion.*/ - if (uop_timestamp <= last_complete_timestamp) - last_complete_timestamp = last_complete_timestamp + 1; + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; else - last_complete_timestamp = uop_timestamp; + start_timestamp = decode_buffer.earliest_start[c]; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } - decode_timestamp++; - decode_buffer.nr_uops = 0; + /*Calculate opquad completion time. Since opquads complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opquad in buffer*/ + opquad_completion_timestamp[next_opquad] = last_complete_timestamp; + next_opquad++; + if (next_opquad == NR_OPQUADS) + next_opquad = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on K6 short decoding*/ -static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { - int len = prefixes + 1; /*Opcode*/ - if (deps & MODRM) - { - len++; /*ModR/M*/ - if (deps & HAS_IMM8) - len++; - if (deps & HAS_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +static void +decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { - uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; - int earliest_start = 0; - decode_type_t decode_type = ins->decode_type; - int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d; + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); - /*Generate input register mask, and determine the earliest time this - instruction can start. This is not accurate, as this is calculated per - x86 instruction when it should be handled per uop*/ - regmask_required = get_dstdep_mask(deps, fetchdat, bit8); - regmask_required |= get_addr_regmask(deps, fetchdat, op_32); - for (c = 0; c < 8; c++) - { - if (regmask_required & (1 << c)) - { - if (reg_available_timestamp[c] > decode_timestamp) - earliest_start = reg_available_timestamp[c]; - } + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; } - if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) - earliest_start = fpu_st_timestamp[0]; - if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) - earliest_start = fpu_st_timestamp[1]; - if ((deps & FPU_RW_STREG)) - { - int reg = fetchdat & 7; + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; - if (fpu_st_timestamp[reg] > decode_timestamp) - earliest_start = fpu_st_timestamp[reg]; - } + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Short decoders are limited to 7 bytes*/ + if (decode_type == DECODE_SHORT && instr_length > 7) + decode_type = DECODE_LONG; + /*Long decoder is limited to 11 bytes*/ + else if (instr_length > 11) + decode_type = DECODE_VECTOR; - switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - - decode_flush(); + switch (decode_type) { + case DECODE_SHORT: + if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[decode_buffer.nr_uops + 1] = &ins->uop[1]; + decode_buffer.earliest_start[decode_buffer.nr_uops + 1] = -1; } - else - { - decode_buffer.nr_uops = ins->nr_uops; - decode_buffer.uops[0] = &ins->uop[0]; - decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } - break; + decode_buffer.nr_uops += ins->nr_uops; - case DECODE_LONG: - if (decode_buffer.nr_uops) - decode_flush(); - - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } decode_flush(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush(); - - decode_timestamp++; - d = 0; - - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; - - if (d == 4) - { - d = 0; - decode_buffer.nr_uops = 4; - decode_flush(); - } + } else { + decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[1] = &ins->uop[1]; + decode_buffer.earliest_start[1] = -1; } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush(); - } - break; - } + } + break; - /*Update write timestamps for any output registers*/ - regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); - for (c = 0; c < 8; c++) - { - if (regmask_modified & (1 << c)) - reg_available_timestamp[c] = last_complete_timestamp; - } - if (deps & FPU_POP) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; - fpu_st_timestamp[7] = 0; - } - if (deps & FPU_POP2) - { - for (c = 0; c < 6; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; - fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; - } - if (deps & FPU_PUSH) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; - fpu_st_timestamp[0] = 0; - } - if (deps & FPU_WRITE_ST0) - fpu_st_timestamp[0] = last_complete_timestamp; - if (deps & FPU_WRITE_ST1) - fpu_st_timestamp[1] = last_complete_timestamp; - if (deps & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps & FPU_WRITE_ST0)) && - !(reg == 1 && (deps & FPU_WRITE_ST1))) - fpu_st_timestamp[reg] = last_complete_timestamp; - } -} + case DECODE_LONG: + if (decode_buffer.nr_uops) + decode_flush(); -void codegen_timing_k6_block_start(void) -{ - int c; - - for (c = 0; c < nr_units; c++) - units[c].first_available_cycle = 0; - - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - m3dnow_first_available_cycle = 0; - - decode_timestamp = 0; - last_complete_timestamp = 0; - - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; - - for (c = 0; c < NR_REGS; c++) - reg_available_timestamp[c] = 0; - for (c = 0; c < 8; c++) - fpu_st_timestamp[c] = 0; -} - -void codegen_timing_k6_start(void) -{ - if (cpu_s->cpu_type == CPU_K6) - { - units = k6_units; - nr_units = NR_K6_UNITS; - } - else - { - units = k6_2_units; - nr_units = NR_K6_2_UNITS; - } - last_prefix = 0; - prefixes = 0; -} - -void codegen_timing_k6_prefix(uint8_t prefix, uint32_t fetchdat) -{ - if (prefix != 0x0f) - decode_timestamp++; - - last_prefix = prefix; - prefixes++; -} - -void codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) -{ - const risc86_instruction_t **ins_table; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int old_last_complete_timestamp = last_complete_timestamp; - int bit8 = !(opcode & 1); - - switch (last_prefix) - { - case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } + decode_buffer.nr_uops = ins->nr_uops; + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[c] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[c] = earliest_start; else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } - break; + decode_buffer.earliest_start[c] = -1; + } + decode_flush(); + break; - case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case DECODE_VECTOR: + if (decode_buffer.nr_uops) + decode_flush(); + + decode_timestamp++; + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if (d == 4) { + d = 0; + decode_buffer.nr_uops = 4; + decode_flush(); + } + } + if (d) { + decode_buffer.nr_uops = d; + decode_flush(); + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void +codegen_timing_k6_block_start(void) +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + mul_first_available_cycle = 0; + shift_first_available_cycle = 0; + m3dnow_first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPQUADS; c++) + opquad_completion_timestamp[c] = 0; + next_opquad = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void +codegen_timing_k6_start(void) +{ + if (cpu_s->cpu_type == CPU_K6) { + units = k6_units; + nr_units = NR_K6_UNITS; + } else { + units = k6_2_units; + nr_units = NR_K6_2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void +codegen_timing_k6_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void +codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const risc86_instruction_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) { + case 0x0f: + if (opcode == 0x0f) { + /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ + uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ + uint8_t modrm = fetchdat & 0xff; + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0) { + if (op_32 & 0x200) { + if ((modrm & 7) == 4) { + /* Has SIB*/ + opcode_pc++; + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((sib & 0x07) == 0x05) + opcode_pc += 4; + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((modrm & 0xc7) == 0x05) + opcode_pc += 4; + } + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 2; + else if ((modrm & 0xc7) == 0x06) + opcode_pc += 2; + } + } + + opcode = fastreadb(cs + opcode_pc); + + ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; + deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; + } else { + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + } + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} - case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; - - case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (ins_table[opcode]) - decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); - else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); +void +codegen_timing_k6_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } } -void codegen_timing_k6_block_end(void) +int +codegen_timing_k6_jump_cycles(void) { - if (decode_buffer.nr_uops) - { - int old_last_complete_timestamp = last_complete_timestamp; - decode_flush(); - codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); - } + if (decode_buffer.nr_uops) + return 1; + return 0; } -int codegen_timing_k6_jump_cycles(void) -{ - if (decode_buffer.nr_uops) - return 1; - return 0; -} - -codegen_timing_t codegen_timing_k6 = -{ - codegen_timing_k6_start, - codegen_timing_k6_prefix, - codegen_timing_k6_opcode, - codegen_timing_k6_block_start, - codegen_timing_k6_block_end, - codegen_timing_k6_jump_cycles +codegen_timing_t codegen_timing_k6 = { + codegen_timing_k6_start, + codegen_timing_k6_prefix, + codegen_timing_k6_opcode, + codegen_timing_k6_block_start, + codegen_timing_k6_block_end, + codegen_timing_k6_jump_cycles }; diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index cf40e084e..008e36594 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -8,6 +8,7 @@ #include "cpu.h" #include <86box/mem.h> #include <86box/machine.h> +#include <86box/plat_unused.h> #include "x86.h" #include "x86_ops.h" @@ -17,893 +18,775 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -typedef enum uop_type_t -{ - UOP_ALU = 0, /*Executes in Port 0 or 1 ALU units*/ - UOP_ALUP0, /*Executes in Port 0 ALU unit*/ - UOP_LOAD, /*Executes in Load unit*/ - UOP_STORED, /*Executes in Data Store unit*/ - UOP_STOREA, /*Executes in Address Store unit*/ - UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORED, /*Executes in Data Store unit*/ - UOP_FSTOREA, /*Executes in Address Store unit*/ - UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORED, /*Executes in Data Store unit*/ - UOP_MSTOREA, /*Executes in Address Store unit*/ - UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MMX, /*Executes in Port 0 or 1 ALU units as MMX*/ - UOP_MMX_SHIFT, /*Executes in Port 1 ALU unit. Uses MMX shifter*/ - UOP_MMX_MUL, /*Executes in Port 0 ALU unit. Uses MMX multiplier*/ - UOP_BRANCH, /*Executes in Branch unit*/ - UOP_FXCH /*Does not require an execution unit*/ +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Port 0 or 1 ALU units*/ + UOP_ALUP0, /*Executes in Port 0 ALU unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MMX, /*Executes in Port 0 or 1 ALU units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Port 1 ALU unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Port 0 ALU unit. Uses MMX multiplier*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_FXCH /*Does not require an execution unit*/ } uop_type_t; -typedef enum decode_type_t -{ - DECODE_SIMPLE, - DECODE_COMPLEX, +typedef enum decode_type_t { + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 -typedef struct p6_uop_t -{ - uop_type_t type; - int latency; +typedef struct p6_uop_t { + uop_type_t type; + int latency; } p6_uop_t; -typedef struct macro_op_t -{ - int nr_uops; - decode_type_t decode_type; - p6_uop_t uop[MAX_UOPS]; +typedef struct macro_op_t { + int nr_uops; + decode_type_t decode_type; + p6_uop_t uop[MAX_UOPS]; } macro_op_t; -static const macro_op_t alu_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t alup0_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t load_alup0_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t load_alup0_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alu_store_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t alu_store_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t alup0_store_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t alup0_store_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t branch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_BRANCH, .latency = 2} +static const macro_op_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 2} }; -static const macro_op_t fxch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FXCH, .latency = 1} +static const macro_op_t fxch_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FXCH, .latency = 1} }; -static const macro_op_t load_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; -static const macro_op_t store_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t store_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1} }; - -static const macro_op_t bswap_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, +static const macro_op_t bswap_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, }; -static const macro_op_t leave_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t lods_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t loop_op = -{ - .nr_uops = 5, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t loop_op = { + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1}, + .uop[4] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t mov_reg_seg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, +static const macro_op_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; -static const macro_op_t movs_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t pop_reg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t pop_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t pop_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t push_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1}, +static const macro_op_t push_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1}, }; -static const macro_op_t push_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t push_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t push_seg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t push_seg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t stos_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t stos_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_reg_b_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t test_mem_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_imm_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t xchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; - -static const macro_op_t mmx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX, .latency = 1} +static const macro_op_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1} }; -static const macro_op_t mmx_mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +static const macro_op_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} }; -static const macro_op_t mmx_shift_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1} +static const macro_op_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1} }; -static const macro_op_t load_mmx_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX, .latency = 2} +static const macro_op_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX, .latency = 2} }; -static const macro_op_t load_mmx_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +static const macro_op_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_MUL, .latency = 2} }; -static const macro_op_t load_mmx_shift_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 2} +static const macro_op_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_SHIFT, .latency = 2} }; -static const macro_op_t mload_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_MLOAD, .latency = 1}, +static const macro_op_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 1}, }; -static const macro_op_t mstore_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_MSTORED, .latency = 1}, - .uop[1] = {.type = UOP_MSTOREA, .latency = 1} +static const macro_op_t mstore_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = { .type = UOP_MSTOREA, .latency = 1} }; -static const macro_op_t pmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +static const macro_op_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} }; -static const macro_op_t pmul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +static const macro_op_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_MUL, .latency = 2} }; -static const macro_op_t float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fadd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t fadd_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t fmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 3} +static const macro_op_t fmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 3} }; -static const macro_op_t float2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t float2_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fchs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t fchs_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t load_float_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t load_fadd_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t load_fadd_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t load_fmul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 4} +static const macro_op_t load_fmul_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 4} }; -static const macro_op_t fstore_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FSTORED, .latency = 1}, - .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +static const macro_op_t fstore_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = { .type = UOP_FSTOREA, .latency = 1}, }; -static const macro_op_t load_fiadd_op = -{ - .nr_uops = 7, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1}, - .uop[2] = {.type = UOP_FLOAT, .latency = 1}, - .uop[3] = {.type = UOP_FLOAT, .latency = 1}, - .uop[4] = {.type = UOP_FLOAT, .latency = 1}, - .uop[5] = {.type = UOP_FLOAT, .latency = 1}, - .uop[6] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t load_fiadd_op = { + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1}, + .uop[2] = { .type = UOP_FLOAT, .latency = 1}, + .uop[3] = { .type = UOP_FLOAT, .latency = 1}, + .uop[4] = { .type = UOP_FLOAT, .latency = 1}, + .uop[5] = { .type = UOP_FLOAT, .latency = 1}, + .uop[6] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fdiv_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 37} +static const macro_op_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 37} }; -static const macro_op_t fdiv_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 37} +static const macro_op_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1 }, + .uop[1] = { .type = UOP_FLOAT, .latency = 37} }; -static const macro_op_t fsin_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 62} +static const macro_op_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 62} }; -static const macro_op_t fsqrt_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 69} +static const macro_op_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const macro_op_t fldcw_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 10} +static const macro_op_t fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const macro_op_t complex_float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t complex_float_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t complex_float_l_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 50} +static const macro_op_t complex_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const macro_op_t flde_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAD, .latency = 1}, - .uop[2] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t flde_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAD, .latency = 1}, + .uop[2] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t fste_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 2}, - .uop[1] = {.type = UOP_FSTORED, .latency = 1}, - .uop[2] = {.type = UOP_FSTOREA, .latency = 1} +static const macro_op_t fste_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = { .type = UOP_FSTORED, .latency = 1}, + .uop[2] = { .type = UOP_FSTOREA, .latency = 1} }; -static const macro_op_t complex_alu1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t complex_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t complex_alup0_1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t complex_alup0_1_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alup0_3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_3_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alup0_6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1}, - .uop[3] = {.type = UOP_ALUP0, .latency = 1}, - .uop[4] = {.type = UOP_ALUP0, .latency = 1}, - .uop[5] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_6_op = { + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1}, + .uop[3] = { .type = UOP_ALUP0, .latency = 1}, + .uop[4] = { .type = UOP_ALUP0, .latency = 1}, + .uop[5] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t bound_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t bound_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 10} +static const macro_op_t bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const macro_op_t call_far_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t call_far_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 7} +static const macro_op_t cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} }; -static const macro_op_t cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t cmpxchg_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t cmpxchg_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t cmpxchg_b_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t cmpxchg_b_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t complex_push_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t complex_push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 23} +static const macro_op_t cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} }; -static const macro_op_t div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 21} +static const macro_op_t div16_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 21} }; -static const macro_op_t div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 21} +static const macro_op_t div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALUP0, .latency = 21} }; -static const macro_op_t div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 37} +static const macro_op_t div32_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 37} }; -static const macro_op_t div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 37} +static const macro_op_t div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALUP0, .latency = 37} }; -static const macro_op_t emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 50} +static const macro_op_t emms_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 50} }; -static const macro_op_t enter_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 10} +static const macro_op_t enter_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1 }, + .uop[1] = { .type = UOP_STOREA, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .latency = 10} }; -static const macro_op_t femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 6} +static const macro_op_t femms_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} }; -static const macro_op_t in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 18} +static const macro_op_t in_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18} }; -static const macro_op_t ins_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 18}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t ins_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18}, + .uop[1] = { .type = UOP_STORED, .latency = 1 }, + .uop[2] = { .type = UOP_STOREA, .latency = 1 }, + .uop[3] = { .type = UOP_ALU, .latency = 1 } }; -static const macro_op_t int_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 20}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_STORED, .latency = 1}, - .uop[4] = {.type = UOP_STOREA, .latency = 1}, - .uop[5] = {.type = UOP_STORED, .latency = 1}, - .uop[6] = {.type = UOP_STOREA, .latency = 1}, - .uop[7] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t int_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = { .type = UOP_STORED, .latency = 1 }, + .uop[2] = { .type = UOP_STOREA, .latency = 1 }, + .uop[3] = { .type = UOP_STORED, .latency = 1 }, + .uop[4] = { .type = UOP_STOREA, .latency = 1 }, + .uop[5] = { .type = UOP_STORED, .latency = 1 }, + .uop[6] = { .type = UOP_STOREA, .latency = 1 }, + .uop[7] = { .type = UOP_BRANCH, .latency = 1 } }; -static const macro_op_t iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 3}, - .uop[1] = {.type = UOP_LOAD, .latency = 3}, - .uop[2] = {.type = UOP_LOAD, .latency = 3}, - .uop[3] = {.type = UOP_ALU, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t iret_op = { + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3 }, + .uop[1] = { .type = UOP_LOAD, .latency = 3 }, + .uop[2] = { .type = UOP_LOAD, .latency = 3 }, + .uop[3] = { .type = UOP_ALU, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .latency = 1 } }; -static const macro_op_t invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 500} +static const macro_op_t invd_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} }; -static const macro_op_t jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t lss_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t mov_mem_seg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, +static const macro_op_t mov_mem_seg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, }; -static const macro_op_t mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} }; -static const macro_op_t mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul64_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1}, - .uop[3] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1}, + .uop[3] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t out_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 18} +static const macro_op_t out_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 18} }; -static const macro_op_t outs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 18} +static const macro_op_t outs_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALU, .latency = 18} }; -static const macro_op_t pusha_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 2}, - .uop[1] = {.type = UOP_STOREA, .latency = 2}, - .uop[2] = {.type = UOP_STORED, .latency = 2}, - .uop[3] = {.type = UOP_STOREA, .latency = 2}, - .uop[4] = {.type = UOP_STORED, .latency = 2}, - .uop[5] = {.type = UOP_STOREA, .latency = 2}, - .uop[6] = {.type = UOP_STORED, .latency = 2}, - .uop[7] = {.type = UOP_STOREA, .latency = 2} +static const macro_op_t pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = { .type = UOP_STOREA, .latency = 2}, + .uop[2] = { .type = UOP_STORED, .latency = 2}, + .uop[3] = { .type = UOP_STOREA, .latency = 2}, + .uop[4] = { .type = UOP_STORED, .latency = 2}, + .uop[5] = { .type = UOP_STOREA, .latency = 2}, + .uop[6] = { .type = UOP_STORED, .latency = 2}, + .uop[7] = { .type = UOP_STOREA, .latency = 2} }; -static const macro_op_t popa_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t popa_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .latency = 1} }; -static const macro_op_t popf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 6}, - .uop[2] = {.type = UOP_ALUP0, .latency = 10} +static const macro_op_t popf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALU, .latency = 6 }, + .uop[2] = { .type = UOP_ALUP0, .latency = 10} }; -static const macro_op_t pushf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t pushf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t ret_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t retf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t scas_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t setcc_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_FSTORED, .latency = 1}, - .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +static const macro_op_t setcc_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_FSTORED, .latency = 1}, + .uop[3] = { .type = UOP_FSTOREA, .latency = 1} }; -static const macro_op_t setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t xchg_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t xchg_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1} }; -static const macro_op_t wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 10000} +static const macro_op_t wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; #define INVALID NULL -static const macro_op_t *opcode_timings[256] = -{ +static const macro_op_t *opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1036,10 +919,11 @@ static const macro_op_t *opcode_timings[256] = &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, /* CLD STD INCDEC*/ &complex_alu1_op, &complex_alu1_op, &alup0_store_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_mod3[256] = -{ +static const macro_op_t *opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_op, &alu_op, &alup0_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1173,10 +1057,11 @@ static const macro_op_t *opcode_timings_mod3[256] = &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, /* CLD STD INCDEC*/ &complex_alu1_op, &complex_alu1_op, &complex_alup0_1_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_0f[256] = -{ +static const macro_op_t *opcode_timings_0f[256] = { + // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, &invd_op, &wbinvd_op, INVALID, INVALID, @@ -1256,9 +1141,10 @@ static const macro_op_t *opcode_timings_0f[256] = INVALID, &pmul_mem_op, INVALID, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on }; -static const macro_op_t *opcode_timings_0f_mod3[256] = -{ +static const macro_op_t *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, &invd_op, &wbinvd_op, INVALID, INVALID, @@ -1342,113 +1228,131 @@ static const macro_op_t *opcode_timings_0f_mod3[256] = static const macro_op_t *opcode_timings_shift[8] = { + // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_b[8] = -{ +static const macro_op_t *opcode_timings_shift_b[8] = { + // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_mod3[8] = -{ +static const macro_op_t *opcode_timings_shift_mod3[8] = { + // clang-format off &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_b_mod3[8] = -{ +static const macro_op_t *opcode_timings_shift_b_mod3[8] = { + // clang-format off &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op + // clang-format on }; -static const macro_op_t *opcode_timings_80[8] = -{ +static const macro_op_t *opcode_timings_80[8] = { + // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, + // clang-format on }; -static const macro_op_t *opcode_timings_80_mod3[8] = -{ +static const macro_op_t *opcode_timings_80_mod3[8] = { + // clang-format off &alup0_op, &alup0_op, &alup0_store_op, &alup0_store_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op, + // clang-format on }; -static const macro_op_t *opcode_timings_8x[8] = -{ +static const macro_op_t *opcode_timings_8x[8] = { + // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on }; -static const macro_op_t *opcode_timings_8x_mod3[8] = -{ +static const macro_op_t *opcode_timings_8x_mod3[8] = { + // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f6[8] = -{ +static const macro_op_t *opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &alup0_store_op, &alup0_store_op, /* MUL IMUL DIV IDIV*/ &mul_mem_op, &mul_mem_op, &div16_mem_op, &div16_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f6_mod3[8] = -{ +static const macro_op_t *opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alup0_op, &alup0_op, /* MUL IMUL DIV IDIV*/ &mul_op, &mul_op, &div16_op, &div16_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f7[8] = -{ +static const macro_op_t *opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &alu_store_op, &alu_store_op, /* MUL IMUL DIV IDIV*/ &mul64_mem_op, &mul64_mem_op, &div32_mem_op, &div32_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f7_mod3[8] = -{ +static const macro_op_t *opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ &mul64_op, &mul64_op, &div32_op, &div32_op, + // clang-format on }; -static const macro_op_t *opcode_timings_ff[8] = -{ +static const macro_op_t *opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &call_far_op, /* JMP JMP far PUSH*/ &branch_op, &jmp_far_op, &push_mem_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_ff_mod3[8] = -{ +static const macro_op_t *opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &complex_alu1_op, &complex_alu1_op, &store_op, &call_far_op, /* JMP JMP far PUSH*/ &branch_op, &jmp_far_op, &complex_push_mem_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_d8[8] = -{ +static const macro_op_t *opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_d8_mod3[8] = -{ +static const macro_op_t *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const macro_op_t *opcode_timings_d9[8] = -{ +static const macro_op_t *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ &complex_float_l_op, &fldcw_op, &complex_float_l_op, &complex_float_op + // clang-format on }; -static const macro_op_t *opcode_timings_d9_mod3[64] = -{ +static const macro_op_t *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, @@ -1477,31 +1381,35 @@ static const macro_op_t *opcode_timings_d9_mod3[64] = &fdiv_op, INVALID, &fsqrt_op, &fsin_op, /* opFRNDINT opFSCALE opFSIN opFCOS*/ &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on }; -static const macro_op_t *opcode_timings_da[8] = -{ +static const macro_op_t *opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_da_mod3[8] = -{ +static const macro_op_t *opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, &float_op, INVALID, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_db[8] = -{ +static const macro_op_t *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ INVALID, &flde_op, INVALID, &fste_op + // clang-format on }; -static const macro_op_t *opcode_timings_db_mod3[64] = -{ +static const macro_op_t *opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1527,153 +1435,152 @@ static const macro_op_t *opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static const macro_op_t *opcode_timings_dc[8] = -{ +static const macro_op_t *opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_dc_mod3[8] = -{ +static const macro_op_t *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on }; -static const macro_op_t *opcode_timings_dd[8] = -{ +static const macro_op_t *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op + // clang-format on }; -static const macro_op_t *opcode_timings_dd_mod3[8] = -{ +static const macro_op_t *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, /* FUCOM FUCOMP*/ &float_op, &float_op, INVALID, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_de[8] = -{ +static const macro_op_t *opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, + // clang-format on }; -static const macro_op_t *opcode_timings_de_mod3[8] = -{ +static const macro_op_t *opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const macro_op_t *opcode_timings_df[8] = -{ +static const macro_op_t *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ INVALID, &load_float_op, &complex_float_l_op, &fstore_op, + // clang-format on }; -static const macro_op_t *opcode_timings_df_mod3[8] = -{ +static const macro_op_t *opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ &float_op, INVALID, INVALID, INVALID + // clang-format on }; - static uint8_t last_prefix; -static int prefixes; +static int prefixes; static int decode_timestamp; static int last_complete_timestamp; -typedef struct p6_unit_t -{ - uint32_t uop_mask; - double first_available_cycle; +typedef struct p6_unit_t { + uint32_t uop_mask; + double first_available_cycle; } p6_unit_t; -static int nr_units; +static int nr_units; static p6_unit_t *units; /*Pentium Pro has no MMX*/ -static p6_unit_t ppro_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT)}, /*Port 0*/ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Port 1*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Port 2*/ - {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Port 3*/ - {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Port 4*/ +static p6_unit_t ppro_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) }, /*Port 0*/ + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) }, /*Port 1*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) }, /*Port 2*/ + { .uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) }, /*Port 3*/ + { .uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) }, /*Port 4*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) /*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ -static p6_unit_t p2_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) | /*Port 0*/ - (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Port 1*/ - (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Port 2*/ - {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Port 3*/ - {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Port 4*/ +static p6_unit_t p2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) | /*Port 0*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Port 1*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT) }, + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Port 2*/ + { .uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED) }, /*Port 3*/ + { .uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA) }, /*Port 4*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -static int uop_run(const p6_uop_t *uop, int decode_time) +static int +uop_run(const p6_uop_t *uop, int decode_time) { - int c; - p6_unit_t *best_unit = NULL; - int best_start_cycle = 99999; + p6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; - /*UOP_FXCH does not require execution*/ - if (uop->type == UOP_FXCH) - return decode_time; + /*UOP_FXCH does not require execution*/ + if (uop->type == UOP_FXCH) + return decode_time; - /*Find execution unit for this uOP*/ - for (c = 0; c < nr_units; c++) - { - if (units[c].uop_mask & (1 << uop->type)) - { - if (units[c].first_available_cycle < best_start_cycle) - { - best_unit = &units[c]; - best_start_cycle = units[c].first_available_cycle; - } - } + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } } - if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); - if (best_start_cycle < decode_time) - best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->latency; + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - - - return best_start_cycle + uop->latency; + return best_start_cycle + uop->latency; } /*The P6 decoders can decode, per clock : - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ -static struct -{ - int nr_uops; - const p6_uop_t *uops[6]; - /*Earliest time a uop can start. If the timestamp is -1, then the uop is - part of a dependency chain and the start time is the completion time of - the previous uop*/ - int earliest_start[6]; +static struct { + int nr_uops; + const p6_uop_t *uops[6]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[6]; } decode_buffer; #define NR_OPSEQS 3 @@ -1691,421 +1598,400 @@ static int fpu_st_timestamp[8]; dependent uop chains*/ static int last_uop_timestamp = 0; -void decode_flush_p6(void) +void +decode_flush_p6(void) { - int c; - int start_timestamp, uop_timestamp = 0; + int start_timestamp; + int uop_timestamp = 0; - /*Decoded opseq can not be submitted if there are no free spaces in the - opseq buffer*/ - if (decode_timestamp < opseq_completion_timestamp[next_opseq]) - decode_timestamp = opseq_completion_timestamp[next_opseq]; + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; - /*Ensure that uops can not be submitted before they have been decoded*/ - if (decode_timestamp > last_uop_timestamp) - last_uop_timestamp = decode_timestamp; + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; - /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < (decode_buffer.nr_uops); c++) - { - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; - - last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); - if (last_uop_timestamp > uop_timestamp) - uop_timestamp = last_uop_timestamp; - } - - /*Calculate opseq completion time. Since opseqs complete in order, it - must be after the last completion.*/ - if (uop_timestamp <= last_complete_timestamp) - last_complete_timestamp = last_complete_timestamp + 1; + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < (decode_buffer.nr_uops); c++) { + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; else - last_complete_timestamp = uop_timestamp; + start_timestamp = decode_buffer.earliest_start[c]; - /*Advance to next opseq in buffer*/ - opseq_completion_timestamp[next_opseq] = last_complete_timestamp; - next_opseq++; - if (next_opseq == NR_OPSEQS) - next_opseq = 0; + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } - decode_timestamp++; - decode_buffer.nr_uops = 0; + /*Calculate opseq completion time. Since opseqs complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on P6 simple decoding*/ -static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { - int len = prefixes + 1; /*Opcode*/ - if (deps & MODRM) - { - len++; /*ModR/M*/ - if (deps & HAS_IMM8) - len++; - if (deps & HAS_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -static void decode_instruction(const macro_op_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +static void +decode_instruction(const macro_op_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { - uint32_t regmask_required; - uint32_t regmask_modified; - int c; - int d = 0; /*Complex decoder uOPs*/ - int earliest_start = 0; - decode_type_t decode_type = ins->decode_type; - int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); - /*Generate input register mask, and determine the earliest time this - instruction can start. This is not accurate, as this is calculated per - x86 instruction when it should be handled per uop*/ - regmask_required = get_dstdep_mask(deps, fetchdat, bit8); - regmask_required |= get_addr_regmask(deps, fetchdat, op_32); - for (c = 0; c < 8; c++) - { - if (regmask_required & (1 << c)) - { - if (reg_available_timestamp[c] > decode_timestamp) - earliest_start = reg_available_timestamp[c]; - } + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; } - if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) - earliest_start = fpu_st_timestamp[0]; - if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) - earliest_start = fpu_st_timestamp[1]; - if ((deps & FPU_RW_STREG)) - { - int reg = fetchdat & 7; + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; - if (fpu_st_timestamp[reg] > decode_timestamp) - earliest_start = fpu_st_timestamp[reg]; - } + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } - /*Simple decoders are limited to 7 bytes & 1 uOP*/ - if ((decode_type == DECODE_SIMPLE && instr_length > 7) || (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)) - decode_type = DECODE_COMPLEX; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if ((decode_type == DECODE_SIMPLE && instr_length > 7) || (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)) + decode_type = DECODE_COMPLEX; - switch (decode_type) - { - case DECODE_SIMPLE: - if (decode_buffer.nr_uops - d == 2) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 3; - decode_flush_p6(); - } - else if (decode_buffer.nr_uops - d == 1) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 2+d; - if (d) - decode_flush_p6(); - } - else if (decode_buffer.nr_uops) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 1+d; - } - else - { - decode_buffer.nr_uops = 1; - decode_buffer.uops[0] = &ins->uop[0]; - decode_buffer.earliest_start[0] = earliest_start; - } - break; - - case DECODE_COMPLEX: - if (decode_buffer.nr_uops) - decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - - d = 0; - - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; - - if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ - { - d = 0; - decode_buffer.nr_uops = 3; - decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ - } - } + switch (decode_type) { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 3; + decode_flush_p6(); + } else if (decode_buffer.nr_uops - d == 1) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2 + d; if (d) - { - decode_buffer.nr_uops = d; + decode_flush_p6(); + } else if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 1 + d; + } else { + decode_buffer.nr_uops = 1; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + } + break; + + case DECODE_COMPLEX: + if (decode_buffer.nr_uops) + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ + + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if ((d == 3) && (ins->nr_uops > 4)) { /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ + d = 0; + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } - break; - } + } + if (d) { + decode_buffer.nr_uops = d; + } + break; + } - /*Update write timestamps for any output registers*/ - regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); - for (c = 0; c < 8; c++) - { - if (regmask_modified & (1 << c)) - reg_available_timestamp[c] = last_complete_timestamp; - } + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps & FPU_POP) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; - fpu_st_timestamp[7] = 0; - } - if (deps & FPU_POP2) - { - for (c = 0; c < 6; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; - fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; - } - if (deps & FPU_PUSH) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; - fpu_st_timestamp[0] = 0; - } - if (deps & FPU_WRITE_ST0) - fpu_st_timestamp[0] = last_complete_timestamp; - if (deps & FPU_WRITE_ST1) - fpu_st_timestamp[1] = last_complete_timestamp; - if (deps & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps & FPU_WRITE_ST0)) && - !(reg == 1 && (deps & FPU_WRITE_ST1))) - fpu_st_timestamp[reg] = last_complete_timestamp; - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } } -void codegen_timing_p6_block_start(void) +void +codegen_timing_p6_block_start(void) { - int c; + int c; - for (c = 0; c < nr_units; c++) - units[c].first_available_cycle = 0; + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; - decode_timestamp = 0; - last_complete_timestamp = 0; + decode_timestamp = 0; + last_complete_timestamp = 0; - for (c = 0; c < NR_OPSEQS; c++) - opseq_completion_timestamp[c] = 0; - next_opseq = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; - for (c = 0; c < NR_REGS; c++) - reg_available_timestamp[c] = 0; - for (c = 0; c < 8; c++) - fpu_st_timestamp[c] = 0; + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; } -void codegen_timing_p6_start(void) +void +codegen_timing_p6_start(void) { - if (cpu_s->cpu_type == CPU_PENTIUMPRO) - { - units = ppro_units; - nr_units = NR_PPRO_UNITS; - } - else - { - units = p2_units; - nr_units = NR_P2_UNITS; - } - last_prefix = 0; - prefixes = 0; + if (cpu_s->cpu_type == CPU_PENTIUMPRO) { + units = ppro_units; + nr_units = NR_PPRO_UNITS; + } else { + units = p2_units; + nr_units = NR_P2_UNITS; + } + last_prefix = 0; + prefixes = 0; } -void codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat) { - if (prefix != 0x0f) - decode_timestamp++; + if (prefix != 0x0f) + decode_timestamp++; - last_prefix = prefix; - prefixes++; + last_prefix = prefix; + prefixes++; } -void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - const macro_op_t **ins_table; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int old_last_complete_timestamp = last_complete_timestamp; - int bit8 = !(opcode & 1); + const macro_op_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} - case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; - - case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (ins_table[opcode]) - decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); - else - decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); +void +codegen_timing_p6_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush_p6(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } } -void codegen_timing_p6_block_end(void) +int +codegen_timing_p6_jump_cycles(void) { - if (decode_buffer.nr_uops) - { - int old_last_complete_timestamp = last_complete_timestamp; - decode_flush_p6(); - codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); - } + if (decode_buffer.nr_uops) + return 1; + return 0; } -int codegen_timing_p6_jump_cycles(void) -{ - if (decode_buffer.nr_uops) - return 1; - return 0; -} - -codegen_timing_t codegen_timing_p6 = -{ - codegen_timing_p6_start, - codegen_timing_p6_prefix, - codegen_timing_p6_opcode, - codegen_timing_p6_block_start, - codegen_timing_p6_block_end, - codegen_timing_p6_jump_cycles +codegen_timing_t codegen_timing_p6 = { + codegen_timing_p6_start, + codegen_timing_p6_prefix, + codegen_timing_p6_opcode, + codegen_timing_p6_block_start, + codegen_timing_p6_block_end, + codegen_timing_p6_jump_cycles }; diff --git a/src/cpu/codegen_timing_pentium.c b/src/cpu/codegen_timing_pentium.c index 232455f6d..58ec5b454 100644 --- a/src/cpu/codegen_timing_pentium.c +++ b/src/cpu/codegen_timing_pentium.c @@ -14,8 +14,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -23,46 +25,45 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" - /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 28) +#define CYCLES_HAS_MULTI (1 << 28) #define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) /*Instruction lasts given number of cycles. Does not pair*/ #define CYCLES(c) (c | PAIR_NP) - -static int pair_timings[4][4] = -{ -/* Reg RM RMW Branch*/ -/*Reg*/ {1, 2, 3, 2}, -/*RM*/ {2, 2, 3, 3}, -/*RMW*/ {3, 4, 5, 4}, -/*Branch*/ {-1, -1, -1, -1} +static int pair_timings[4][4] = { + /* Reg RM RMW Branch*/ + /*Reg*/ {1, 2, 3, 2 }, + /*RM*/ + { 2, 2, 3, 3 }, + /*RMW*/ + { 3, 4, 5, 4 }, + /*Branch*/ + { -1, -1, -1, -1} }; /*Instruction follows either register timing, read-modify, or read-modify-write. May be pairable*/ -#define CYCLES_REG (0ull << 0) -#define CYCLES_RM (1ull << 0) -#define CYCLES_RMW (2ull << 0) +#define CYCLES_REG (0ull << 0) +#define CYCLES_RM (1ull << 0) +#define CYCLES_RMW (2ull << 0) #define CYCLES_BRANCH (3ull << 0) /*Instruction has immediate data. Can only be used with PAIR_U/PAIR_V/PAIR_UV*/ -#define CYCLES_HASIMM (3ull << 2) +#define CYCLES_HASIMM (3ull << 2) #define CYCLES_IMM8 (1ull << 2) #define CYCLES_IMM1632 (2ull << 2) -#define CYCLES_MASK ((1ull << 7) - 1) - +#define CYCLES_MASK ((1ull << 7) - 1) /*Instruction does not pair*/ #define PAIR_NP (0ull << 29) /*Instruction pairs in U pipe only*/ -#define PAIR_U (1ull << 29) +#define PAIR_U (1ull << 29) /*Instruction pairs in V pipe only*/ -#define PAIR_V (2ull << 29) +#define PAIR_V (2ull << 29) /*Instruction pairs in both U and V pipes*/ #define PAIR_UV (3ull << 29) /*Instruction pairs in U pipe only and only with FXCH*/ @@ -70,36 +71,34 @@ static int pair_timings[4][4] = /*Instruction is FXCH and only pairs in V pipe with FX pairable instruction*/ #define PAIR_FXCH (6ull << 29) -#define PAIR_FPU (4ull << 29) +#define PAIR_FPU (4ull << 29) #define PAIR_MASK (7ull << 29) - /*comp_time = cycles until instruction complete i_overlap = cycles that overlap with integer f_overlap = cycles that overlap with subsequent FPU*/ -#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t)comp_time) | ((uint64_t)i_overlap << 41) | ((uint64_t)f_overlap << 49) | PAIR_FPU +#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t) comp_time) | ((uint64_t) i_overlap << 41) | ((uint64_t) f_overlap << 49) | PAIR_FPU -#define FPU_COMP_TIME(timing) (timing & 0xff) -#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) -#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) -#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) -#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) -#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) +#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) +#define INVALID 0 -#define INVALID 0 - -static int u_pipe_full; -static uint32_t u_pipe_opcode; +static int u_pipe_full; +static uint32_t u_pipe_opcode; static uint64_t *u_pipe_timings; -static uint32_t u_pipe_op_32; -static uint32_t u_pipe_regmask; -static uint32_t u_pipe_fetchdat; -static int u_pipe_decode_delay_offset; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; static uint64_t *u_pipe_deps; static uint32_t regmask_modified; @@ -109,8 +108,8 @@ static uint32_t addr_regmask; static int fpu_latency; static int fpu_st_latency[8]; -static uint64_t opcode_timings[256] = -{ +static uint64_t opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* ADD ADD PUSH ES POP ES*/ @@ -243,10 +242,11 @@ static uint64_t opcode_timings[256] = PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_RMW, INVALID + // clang-format on }; -static uint64_t opcode_timings_mod3[256] = -{ +static uint64_t opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* ADD ADD PUSH ES POP ES*/ @@ -380,10 +380,11 @@ static uint64_t opcode_timings_mod3[256] = PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_REG, INVALID + // clang-format on }; -static uint64_t opcode_timings_0f[256] = -{ +static uint64_t opcode_timings_0f[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -463,9 +464,10 @@ static uint64_t opcode_timings_0f[256] = INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, + // clang-format on }; -static uint64_t opcode_timings_0f_mod3[256] = -{ +static uint64_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -545,86 +547,98 @@ static uint64_t opcode_timings_0f_mod3[256] = INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, + // clang-format on }; -static uint64_t opcode_timings_shift[8] = -{ +static uint64_t opcode_timings_shift[8] = { + // clang-format off PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, + // clang-format on }; -static uint64_t opcode_timings_shift_mod3[8] = -{ +static uint64_t opcode_timings_shift_mod3[8] = { + // clang-format off PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, + // clang-format on }; -static uint64_t opcode_timings_f6[8] = -{ +static uint64_t opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) + // clang-format on }; -static uint64_t opcode_timings_f6_mod3[8] = -{ +static uint64_t opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) + // clang-format on }; -static uint64_t opcode_timings_f7[8] = -{ +static uint64_t opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) + // clang-format on }; -static uint64_t opcode_timings_f7_mod3[8] = -{ +static uint64_t opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) + // clang-format on }; -static uint64_t opcode_timings_ff[8] = -{ +static uint64_t opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID + // clang-format on }; -static uint64_t opcode_timings_ff_mod3[8] = -{ +static uint64_t opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID + // clang-format on }; -static uint64_t opcode_timings_d8[8] = -{ +static uint64_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBs FSUBRs FDIVs FDIVRs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_d8_mod3[8] = -{ +static uint64_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUB FSUBR FDIV FDIVR*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_d9[8] = -{ +static uint64_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FLDENV FLDCW FSTENV FSTCW*/ PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) + // clang-format on }; -static uint64_t opcode_timings_d9_mod3[64] = -{ +static uint64_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -653,32 +667,35 @@ static uint64_t opcode_timings_d9_mod3[64] = PAIR_NP | FPU_CYCLES(64,2,2), INVALID, PAIR_NP | FPU_CYCLES(70,69,2), PAIR_NP | FPU_CYCLES(89,2,2), /* opFRNDINT opFSCALE opFSIN opFCOS*/ PAIR_NP | FPU_CYCLES(9,0,0), PAIR_NP | FPU_CYCLES(20,5,0), PAIR_NP | FPU_CYCLES(65,2,2), PAIR_NP | FPU_CYCLES(65,2,2) + // clang-format on }; -static uint64_t opcode_timings_da[8] = -{ +static uint64_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) + // clang-format on }; -static uint64_t opcode_timings_da_mod3[8] = -{ +static uint64_t opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; - -static uint64_t opcode_timings_db[8] = -{ +static uint64_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FLDe FSTPe*/ INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) + // clang-format on }; -static uint64_t opcode_timings_db_mod3[64] = -{ +static uint64_t opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -704,616 +721,606 @@ static uint64_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint64_t opcode_timings_dc[8] = -{ +static uint64_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBd FSUBRd FDIVd FDIVRd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_dc_mod3[8] = -{ +static uint64_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_dd[8] = -{ +static uint64_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FRSTOR FSAVE FSTSW*/ PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) + // clang-format on }; -static uint64_t opcode_timings_dd_mod3[8] = -{ +static uint64_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), /* FUCOM FUCOMP*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; -static uint64_t opcode_timings_de[8] = -{ +static uint64_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) + // clang-format on }; -static uint64_t opcode_timings_de_mod3[8] = -{ +static uint64_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBP FSUBRP FDIVP FDIVRP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_df[8] = -{ +static uint64_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FILDiq FBSTP FISTPiq*/ INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) + // clang-format on }; -static uint64_t opcode_timings_df_mod3[8] = -{ +static uint64_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ PAIR_NP | FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID + // clang-format on }; -static uint64_t opcode_timings_81[8] = -{ +static uint64_t opcode_timings_81[8] = { + // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 + // clang-format on }; -static uint64_t opcode_timings_81_mod3[8] = -{ +static uint64_t opcode_timings_81_mod3[8] = { + // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG + // clang-format on }; -static uint64_t opcode_timings_8x[8] = -{ +static uint64_t opcode_timings_8x[8] = { + // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 + // clang-format on }; -static uint64_t opcode_timings_8x_mod3[8] = -{ +static uint64_t opcode_timings_8x_mod3[8] = { + // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG + // clang-format on }; -static int decode_delay, decode_delay_offset; +static int decode_delay; +static int decode_delay_offset; static uint8_t last_prefix; -static int prefixes; +static int prefixes; -static inline int COUNT(uint64_t timings, uint64_t deps, int op_32) +static inline int +COUNT(uint64_t timings, uint64_t deps, int op_32) { - if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) - return FPU_I_LATENCY(timings); - if (timings & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return ((uintptr_t)timings >> 8) & 0xff; - return (uintptr_t)timings & 0xff; - } - if (!(timings & PAIR_MASK)) - return timings & 0xffff; - if ((timings & PAIR_MASK) == PAIR_FX) - return timings & 0xffff; - if ((timings & PAIR_MASK) == PAIR_FXCH) - return timings & 0xffff; - if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) - timings &= 3; - switch (timings & CYCLES_MASK) - { - case CYCLES_REG: + if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) + return FPU_I_LATENCY(timings); + if (timings & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return ((uintptr_t) timings >> 8) & 0xff; + return (uintptr_t) timings & 0xff; + } + if (!(timings & PAIR_MASK)) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FX) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FXCH) + return timings & 0xffff; + if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) + timings &= 3; + switch (timings & CYCLES_MASK) { + case CYCLES_REG: + return 1; + case CYCLES_RM: + return 2; + case CYCLES_RMW: + return 3; + case CYCLES_BRANCH: + return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; + } + + fatal("Illegal COUNT %016llx\n", timings); + + return timings; +} + +static int +codegen_fpu_latencies(uint64_t deps, int reg) +{ + int latency = fpu_latency; + + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; + + return latency; +} + +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 + +static void +codegen_fpu_latency_clock(int count) +{ + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); +} + +static inline int +codegen_timing_has_displacement(uint32_t fetchdat, int op_32) +{ + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /*Has SIB*/ + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) + return 1; + } else { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) return 1; - case CYCLES_RM: - return 2; - case CYCLES_RMW: - return 3; - case CYCLES_BRANCH: - return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; } - - fatal("Illegal COUNT %016llx\n", timings); - - return timings; -} - -static int codegen_fpu_latencies(uint64_t deps, int reg) -{ - int latency = fpu_latency; - - if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) - latency = fpu_st_latency[0]; - if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) - latency = fpu_st_latency[1]; - if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) - latency = fpu_st_latency[reg]; - - return latency; -} - -#define SUB_AND_CLAMP(latency, count) \ - latency -= count; \ - if (latency < 0) \ - latency = 0 - -static void codegen_fpu_latency_clock(int count) -{ - SUB_AND_CLAMP(fpu_latency, count); - SUB_AND_CLAMP(fpu_st_latency[0], count); - SUB_AND_CLAMP(fpu_st_latency[1], count); - SUB_AND_CLAMP(fpu_st_latency[2], count); - SUB_AND_CLAMP(fpu_st_latency[3], count); - SUB_AND_CLAMP(fpu_st_latency[4], count); - SUB_AND_CLAMP(fpu_st_latency[5], count); - SUB_AND_CLAMP(fpu_st_latency[6], count); - SUB_AND_CLAMP(fpu_st_latency[7], count); -} - -static inline int codegen_timing_has_displacement(uint32_t fetchdat, int op_32) -{ - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /*Has SIB*/ - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) - return 1; - } - else - { - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) - return 1; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) - return 1; - } - return 0; + } else { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) + return 1; + } + return 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on Pentium MMX parallel decoding*/ -static inline int codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) +static inline int +codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) { - int len = prefixes; - if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) - { - len += 2; /*Opcode + ModR/M*/ - if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) - len++; - if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes; + if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) { + len += 2; /*Opcode + ModR/M*/ + if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) + len++; + if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -void codegen_timing_pentium_block_start(void) +void +codegen_timing_pentium_block_start(void) { - u_pipe_full = decode_delay = decode_delay_offset = 0; + u_pipe_full = decode_delay = decode_delay_offset = 0; } -void codegen_timing_pentium_start(void) +void +codegen_timing_pentium_start(void) { - last_prefix = 0; - prefixes = 0; + last_prefix = 0; + prefixes = 0; } -void codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) { - prefixes++; - if ((prefix & 0xf8) == 0xd8) - { - last_prefix = prefix; - return; - } - if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) - { - /*On Pentium MMX 0fh prefix is 'free'*/ - last_prefix = prefix; - return; - } - if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) - { - /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ - decode_delay_offset += 2; - last_prefix = prefix; - return; - } - if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) - { - /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ - last_prefix = prefix; - return; - } - /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed - by execution of previous instructions*/ - decode_delay_offset++; + prefixes++; + if ((prefix & 0xf8) == 0xd8) { last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) { + /*On Pentium MMX 0fh prefix is 'free'*/ + last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) { + /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ + decode_delay_offset += 2; + last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) { + /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not - cause AGIs with each other, but do with instructions that use it explicitly*/ - if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) - addr_regmask |= (1 << REG_ESP); + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); - return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; } -static void codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +static void +codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) { - int instr_cycles, latency = 0; + int instr_cycles; + int latency = 0; - if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) - instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); - else - { -/* if (timings[opcode] & FPU_WRITE_ST0) - fatal("FPU_WRITE_ST0\n"); - if (timings[opcode] & FPU_WRITE_ST1) - fatal("FPU_WRITE_ST1\n"); - if (timings[opcode] & FPU_WRITE_STREG) - fatal("FPU_WRITE_STREG\n");*/ - instr_cycles = 0; - } + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else { +#if 0 + if (timings[opcode] & FPU_WRITE_ST0) + fatal("FPU_WRITE_ST0\n"); + if (timings[opcode] & FPU_WRITE_ST1) + fatal("FPU_WRITE_ST1\n"); + if (timings[opcode] & FPU_WRITE_STREG) + fatal("FPU_WRITE_STREG\n");*/ +#endif + instr_cycles = 0; + } - if ((decode_delay + decode_delay_offset) > 0) - codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); - else - codegen_fpu_latency_clock(instr_cycles); - instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); - instr_cycles += exec_delay; - if ((decode_delay + decode_delay_offset) > 0) - codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; - else - codegen_block_cycles += instr_cycles; + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; - decode_delay = (-instr_cycles) + 1; + decode_delay = (-instr_cycles) + 1; + if (deps[opcode] & FPU_POP) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c + 1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) { + for (uint8_t c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c + 2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) { + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c + 1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) { +#if 0 + if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ +#endif + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) { +#if 0 + if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ +#endif + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps[opcode] & FPU_POP) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c] = fpu_st_latency[c+1]; - fpu_st_latency[7] = 0; - } - if (deps[opcode] & FPU_POP2) - { - int c; - - for (c = 0; c < 6; c++) - fpu_st_latency[c] = fpu_st_latency[c+2]; - fpu_st_latency[6] = fpu_st_latency[7] = 0; - } - if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) - { - fpu_latency = FPU_F_LATENCY(timings[opcode]); - } - - if (deps[opcode] & FPU_PUSH) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c+1] = fpu_st_latency[c]; - fpu_st_latency[0] = 0; - } - if (deps[opcode] & FPU_WRITE_ST0) - { -/* if (fpu_st_latency[0]) - fatal("Bad latency ST0\n");*/ - fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_ST1) - { -/* if (fpu_st_latency[1]) - fatal("Bad latency ST1\n");*/ - fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps[opcode] & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && - !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) - { - fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) { + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); } + } } -void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint64_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); - int agi_stall = 0; + uint64_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (u_pipe_full) { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; - opcode = (fetchdat >> 3) & 7; - break; + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) != PAIR_FXCH) + goto nopair; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) + goto nopair; - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) == PAIR_FXCH) { + int temp; - if (u_pipe_full) - { - uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - - if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && - (timings[opcode] & PAIR_MASK) != PAIR_FXCH) - goto nopair; - - if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && - (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) - goto nopair; - - if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && - (timings[opcode] & PAIR_MASK) == PAIR_FXCH) - { - int temp; - - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - - temp = fpu_st_latency[fetchdat & 7]; - fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; - fpu_st_latency[0] = temp; - - u_pipe_full = 0; - decode_delay_offset = 0; - regmask_modified = u_pipe_regmask; - addr_regmask = 0; - return; - } - - if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) <= 0) - { - int has_displacement; - - if (timings[opcode] & CYCLES_HASIMM) - has_displacement = codegen_timing_has_displacement(fetchdat, op_32); - else - has_displacement = 0; - - if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) - { - int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; - int t2 = timings[opcode] & CYCLES_MASK; - int t_pair; - uint64_t temp_timing; - uint64_t temp_deps = 0; - - if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) - t1 &= 3; - if (!(timings[opcode] & PAIR_FPU)) - t2 &= 3; - - if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) - fatal("Pair out of range\n"); - - t_pair = pair_timings[t1][t2]; - if (t_pair < 1) - fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); - - /*Instruction can pair with previous*/ - temp_timing = t_pair; - if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); - u_pipe_full = 0; - decode_delay_offset = 0; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; - addr_regmask = 0; - return; - } - } -nopair: - /*Instruction can not pair with previous*/ - /*Run previous now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; - regmask_modified = u_pipe_regmask; - addr_regmask = 0; - } - - if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) - { - int has_displacement; - - if (timings[opcode] & CYCLES_HASIMM) - has_displacement = codegen_timing_has_displacement(fetchdat, op_32); - else - has_displacement = 0; - - if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) - { - /*Instruction might pair with next*/ - u_pipe_full = 1; - u_pipe_opcode = opcode; - u_pipe_timings = timings; - u_pipe_op_32 = op_32; - u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - u_pipe_fetchdat = fetchdat; - u_pipe_decode_delay_offset = decode_delay_offset; - u_pipe_deps = deps; - decode_delay_offset = 0; - return; - } - } - /*Instruction can not pair and must run now*/ - if (check_agi(deps, opcode, fetchdat, op_32)) + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; - codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); - addr_regmask = 0; -} -void codegen_timing_pentium_block_end(void) -{ - if (u_pipe_full) - { - /*Run previous now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - codegen_block_cycles++; - codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; - u_pipe_full = 0; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + + temp = fpu_st_latency[fetchdat & 7]; + fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; + fpu_st_latency[0] = temp; + + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + return; } + + if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay + decode_delay_offset + u_pipe_decode_delay_offset) <= 0) { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { + int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; + int t2 = timings[opcode] & CYCLES_MASK; + int t_pair; + uint64_t temp_timing; + uint64_t temp_deps = 0; + + if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) + t1 &= 3; + if (!(timings[opcode] & PAIR_FPU)) + t2 &= 3; + + if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) + fatal("Pair out of range\n"); + + t_pair = pair_timings[t1][t2]; + if (t_pair < 1) + fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); + + /*Instruction can pair with previous*/ + temp_timing = t_pair; + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + addr_regmask = 0; + return; + } + } +nopair: + /*Instruction can not pair with previous*/ + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + } + + if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { + /*Instruction might pair with next*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + } + /*Instruction can not pair and must run now*/ + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + addr_regmask = 0; } -codegen_timing_t codegen_timing_pentium = +void +codegen_timing_pentium_block_end(void) { - codegen_timing_pentium_start, - codegen_timing_pentium_prefix, - codegen_timing_pentium_opcode, - codegen_timing_pentium_block_start, - codegen_timing_pentium_block_end, - NULL + if (u_pipe_full) { + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + codegen_block_cycles++; + codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_pentium = { + codegen_timing_pentium_start, + codegen_timing_pentium_prefix, + codegen_timing_pentium_opcode, + codegen_timing_pentium_block_start, + codegen_timing_pentium_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_winchip.c b/src/cpu/codegen_timing_winchip.c index a1ee02b63..11dd912b4 100644 --- a/src/cpu/codegen_timing_winchip.c +++ b/src/cpu/codegen_timing_winchip.c @@ -4,19 +4,21 @@ #include #include <86box/86box.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" -#include <86box/mem.h> #include "codegen.h" #include "codegen_ops.h" #include "codegen_timing_common.h" -#define CYCLES(c) (int *)c -#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) +#define CYCLES(c) (int *) c +#define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = -{ +static int *opcode_timings[256] = { + // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -36,10 +38,11 @@ static int *opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_mod3[256] = -{ +static int *opcode_timings_mod3[256] = { + // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -59,10 +62,11 @@ static int *opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_0f[256] = -{ +static int *opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -82,9 +86,10 @@ static int *opcode_timings_0f[256] = /*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, + // clang-format on }; -static int *opcode_timings_0f_mod3[256] = -{ +static int *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -104,60 +109,72 @@ static int *opcode_timings_0f_mod3[256] = /*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, + // clang-format on }; -static int *opcode_timings_shift[8] = -{ +static int *opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) + // clang-format on }; -static int *opcode_timings_shift_mod3[8] = -{ +static int *opcode_timings_shift_mod3[8] = { + // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static int *opcode_timings_f6[8] = -{ +static int *opcode_timings_f6[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f6_mod3[8] = -{ +static int *opcode_timings_f6_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f7[8] = -{ +static int *opcode_timings_f7[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_f7_mod3[8] = -{ +static int *opcode_timings_f7_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_ff[8] = -{ +static int *opcode_timings_ff[8] = { + // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_ff_mod3[8] = -{ +static int *opcode_timings_ff_mod3[8] = { + // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_d8[8] = -{ +static int *opcode_timings_d8[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_d8_mod3[8] = -{ +static int *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_d9[8] = -{ +static int *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(2), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) + // clang-format on }; -static int *opcode_timings_d9_mod3[64] = -{ +static int *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), /*FXCH*/ @@ -174,26 +191,29 @@ static int *opcode_timings_d9_mod3[64] = CYCLES(300), CYCLES(58), CYCLES(676), CYCLES(355), NULL, NULL, CYCLES(3), CYCLES(3), /* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ CYCLES(70), NULL, CYCLES(72), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(474), CYCLES(474) + // clang-format on }; -static int *opcode_timings_da[8] = -{ +static int *opcode_timings_da[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_da_mod3[8] = -{ +static int *opcode_timings_da_mod3[8] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL + // clang-format on }; - -static int *opcode_timings_db[8] = -{ +static int *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), NULL, CYCLES(8) + // clang-format on }; -static int *opcode_timings_db_mod3[64] = -{ +static int *opcode_timings_db_mod3[64] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -203,54 +223,63 @@ static int *opcode_timings_db_mod3[64] = NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // clang-format on }; -static int *opcode_timings_dc[8] = -{ +static int *opcode_timings_dc[8] = { + // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(6), CYCLES(8), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(74), CYCLES(74) + // clang-format on }; -static int *opcode_timings_dc_mod3[8] = -{ +static int *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(4), CYCLES(6), NULL, NULL, CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_dd[8] = -{ +static int *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(2), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(5) + // clang-format on }; -static int *opcode_timings_dd_mod3[8] = -{ +static int *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL + // clang-format on }; -static int *opcode_timings_de[8] = -{ +static int *opcode_timings_de[8] = { + // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_de_mod3[8] = -{ +static int *opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), NULL, CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_df[8] = -{ +static int *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), CYCLES(172), CYCLES(8) + // clang-format on }; -static int *opcode_timings_df_mod3[8] = -{ +static int *opcode_timings_df_mod3[8] = { + // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL + // clang-format on }; -static int *opcode_timings_8x[8] = -{ +static int *opcode_timings_8x[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; static int *opcode_timings_8x_mod3[8] = @@ -264,158 +293,169 @@ static int *opcode_timings_81[8] = static int *opcode_timings_81_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static inline int COUNT(int *c, int op_32) +static inline int +COUNT(int *c, int op_32) { - if ((uintptr_t)c <= 10000) - return (int)(uintptr_t)c; - if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - return *c; + if ((uintptr_t) c <= 10000) + return (int) (uintptr_t) c; + if (((uintptr_t) c & ~0xffff) == (-1 & ~0xffff)) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + return *c; } -void codegen_timing_winchip_block_start(void) +void +codegen_timing_winchip_block_start(void) { - regmask_modified = 0; + regmask_modified = 0; } -void codegen_timing_winchip_start(void) +void +codegen_timing_winchip_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); - last_prefix = prefix; + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; } -void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - int **timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + int **timings; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - timing_count += COUNT(timings[opcode], op_32); - if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) - timing_count++; /*AGI stall*/ - codegen_block_cycles += timing_count; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -void codegen_timing_winchip_block_end(void) +void +codegen_timing_winchip_block_end(void) { + // } -codegen_timing_t codegen_timing_winchip = -{ - codegen_timing_winchip_start, - codegen_timing_winchip_prefix, - codegen_timing_winchip_opcode, - codegen_timing_winchip_block_start, - codegen_timing_winchip_block_end, - NULL +codegen_timing_t codegen_timing_winchip = { + codegen_timing_winchip_start, + codegen_timing_winchip_prefix, + codegen_timing_winchip_opcode, + codegen_timing_winchip_block_start, + codegen_timing_winchip_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_winchip2.c b/src/cpu/codegen_timing_winchip2.c index f96304072..d4e32611e 100644 --- a/src/cpu/codegen_timing_winchip2.c +++ b/src/cpu/codegen_timing_winchip2.c @@ -14,6 +14,7 @@ #include <86box/86box.h> #include "cpu.h" #include <86box/mem.h> +#include <86box/plat_unused.h> #include "x86.h" #include "x86_ops.h" @@ -23,46 +24,46 @@ #include "codegen_timing_common.h" /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 31) +#define CYCLES_HAS_MULTI (1 << 31) -#define CYCLES_FPU (1 << 30) +#define CYCLES_FPU (1 << 30) #define CYCLES_IS_MMX_MUL (1 << 29) #define CYCLES_IS_MMX_SHIFT (1 << 28) #define CYCLES_IS_MMX_ANY (1 << 27) #define CYCLES_IS_3DNOW (1 << 26) -#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c) +#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c) #define CYCLES_MMX_SHIFT(c) (CYCLES_IS_MMX_SHIFT | c) -#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c) -#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c) +#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c) +#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c) -#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW) +#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW) -#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX)) +#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX)) -#define CYCLES(c) c -#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) +#define CYCLES(c) c +#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) /*comp_time = cycles until instruction complete i_overlap = cycles that overlap with integer f_overlap = cycles that overlap with subsequent FPU*/ #define FPU_CYCLES(comp_time, i_overlap, f_overlap) (comp_time) | (i_overlap << 8) | (f_overlap << 16) | CYCLES_FPU -#define FPU_COMP_TIME(timing) (timing & 0xff) -#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff) -#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff) +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff) -#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) -#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) -#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff) +#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff) -#define INVALID 0 +#define INVALID 0 -static uint32_t opcode_timings[256] = -{ +static uint32_t opcode_timings[256] = { + // clang-format off /*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), /*20*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), @@ -82,10 +83,11 @@ static uint32_t opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID + // clang-format on }; -static uint32_t opcode_timings_mod3[256] = -{ +static uint32_t opcode_timings_mod3[256] = { + // clang-format off /*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), /*20*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), @@ -105,10 +107,11 @@ static uint32_t opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f[256] = -{ +static uint32_t opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -128,9 +131,10 @@ static uint32_t opcode_timings_0f[256] = /*d0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), /*e0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), /*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = -{ +static uint32_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -150,66 +154,78 @@ static uint32_t opcode_timings_0f_mod3[256] = /*d0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), /*e0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), /*f0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, + // clang-format on }; -static uint32_t opcode_timings_shift[8] = -{ +static uint32_t opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) + // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = -{ +static uint32_t opcode_timings_shift_mod3[8] = { + // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static uint32_t opcode_timings_f6[8] = -{ +static uint32_t opcode_timings_f6[8] = { + // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = -{ +static uint32_t opcode_timings_f6_mod3[8] = { + // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static uint32_t opcode_timings_f7[8] = -{ +static uint32_t opcode_timings_f7[8] = { + // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = -{ +static uint32_t opcode_timings_f7_mod3[8] = { + // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static uint32_t opcode_timings_ff[8] = -{ +static uint32_t opcode_timings_ff[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID + // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = -{ +static uint32_t opcode_timings_ff_mod3[8] = { + // clang-format off CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID + // clang-format on }; -static uint32_t opcode_timings_d8[8] = -{ +static uint32_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUBs FSUBRs FDIVs FDIVRs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = -{ +static uint32_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUB FSUBR FDIV FDIVR*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_d9[8] = -{ +static uint32_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), /* FLDENV FLDCW FSTENV FSTCW*/ FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0) + // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = -{ +static uint32_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -238,32 +254,35 @@ static uint32_t opcode_timings_d9_mod3[64] = FPU_CYCLES(64,2,2), INVALID, FPU_CYCLES(70,69,2),FPU_CYCLES(89,2,2), /* opFRNDINT opFSCALE opFSIN opFCOS*/ FPU_CYCLES(9,0,0), FPU_CYCLES(20,5,0), FPU_CYCLES(65,2,2), FPU_CYCLES(65,2,2) + // clang-format on }; -static uint32_t opcode_timings_da[8] = -{ +static uint32_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) + // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = -{ +static uint32_t opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; - -static uint32_t opcode_timings_db[8] = -{ +static uint32_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), /* FLDe FSTPe*/ INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0) + // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = -{ +static uint32_t opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -289,455 +308,469 @@ static uint32_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint32_t opcode_timings_dc[8] = -{ +static uint32_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUBd FSUBRd FDIVd FDIVRd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = -{ +static uint32_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_dd[8] = -{ +static uint32_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), /* FRSTOR FSAVE FSTSW*/ FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0) + // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = -{ +static uint32_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FUCOM FUCOMP*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_de[8] = -{ +static uint32_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) + // clang-format on }; -static uint32_t opcode_timings_de_mod3[8] = -{ +static uint32_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0), /* FSUBP FSUBRP FDIVP FDIVRP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_df[8] = -{ +static uint32_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), /* FILDiq FBSTP FISTPiq*/ INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0) + // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = -{ +static uint32_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_8x[8] = -{ +static uint32_t opcode_timings_8x[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = -{ +static uint32_t opcode_timings_8x_mod3[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_81[8] = -{ +static uint32_t opcode_timings_81[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = -{ +static uint32_t opcode_timings_81_mod3[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static int decode_delay, decode_delay_offset; -static int fpu_latency; -static int fpu_st_latency[8]; +static int decode_delay; +static int decode_delay_offset; +static int fpu_latency; +static int fpu_st_latency[8]; -static int u_pipe_full; -static uint32_t u_pipe_opcode; +static int u_pipe_full; +static uint32_t u_pipe_opcode; static uint32_t *u_pipe_timings; -static uint32_t u_pipe_op_32; -static uint32_t u_pipe_regmask; -static uint32_t u_pipe_fetchdat; -static int u_pipe_decode_delay_offset; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; static uint64_t *u_pipe_deps; -int can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b) +int +can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b) { - /*Only MMX/3DNow instructions can pair*/ - if (!(timing_b & CYCLES_IS_MMX)) - return 0; - /*Only one MMX multiply per cycle*/ - if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL)) - return 0; - /*Only one MMX shift/pack per cycle*/ - if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT)) - return 0; - /*Second instruction can not access registers written by first*/ - if (u_pipe_regmask & regmask_b) - return 0; - /*Must have had enough time to decode prefixes*/ - if ((decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) > 0) - return 0; + /*Only MMX/3DNow instructions can pair*/ + if (!(timing_b & CYCLES_IS_MMX)) + return 0; + /*Only one MMX multiply per cycle*/ + if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL)) + return 0; + /*Only one MMX shift/pack per cycle*/ + if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT)) + return 0; + /*Second instruction can not access registers written by first*/ + if (u_pipe_regmask & regmask_b) + return 0; + /*Must have had enough time to decode prefixes*/ + if ((decode_delay + decode_delay_offset + u_pipe_decode_delay_offset) > 0) + return 0; - return 1; + return 1; } -static inline int COUNT(uint32_t c, int op_32) +static inline int +COUNT(uint32_t c, int op_32) { - if (c & CYCLES_FPU) - return FPU_I_LATENCY(c); - if (c & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return (c >> 8) & 0xff; - return c & 0xff; - } - return GET_CYCLES(c); + if (c & CYCLES_FPU) + return FPU_I_LATENCY(c); + if (c & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return (c >> 8) & 0xff; + return c & 0xff; + } + return GET_CYCLES(c); } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not - cause AGIs with each other, but do with instructions that use it explicitly*/ - if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) - addr_regmask |= (1 << REG_ESP); + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); - return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; } -static int codegen_fpu_latencies(uint64_t deps, int reg) +static int +codegen_fpu_latencies(uint64_t deps, int reg) { - int latency = fpu_latency; + int latency = fpu_latency; - if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) - latency = fpu_st_latency[0]; - if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) - latency = fpu_st_latency[1]; - if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) - latency = fpu_st_latency[reg]; + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; - return latency; + return latency; } -#define SUB_AND_CLAMP(latency, count) \ - latency -= count; \ - if (latency < 0) \ - latency = 0 +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 -static void codegen_fpu_latency_clock(int count) +static void +codegen_fpu_latency_clock(int count) { - SUB_AND_CLAMP(fpu_latency, count); - SUB_AND_CLAMP(fpu_st_latency[0], count); - SUB_AND_CLAMP(fpu_st_latency[1], count); - SUB_AND_CLAMP(fpu_st_latency[2], count); - SUB_AND_CLAMP(fpu_st_latency[3], count); - SUB_AND_CLAMP(fpu_st_latency[4], count); - SUB_AND_CLAMP(fpu_st_latency[5], count); - SUB_AND_CLAMP(fpu_st_latency[6], count); - SUB_AND_CLAMP(fpu_st_latency[7], count); + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); } -static void codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +static void +codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) { - int instr_cycles, latency = 0; + int instr_cycles; + int latency = 0; - if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH)) - instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); - else - instr_cycles = 0; + if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else + instr_cycles = 0; - if ((decode_delay + decode_delay_offset) > 0) - codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); - else - codegen_fpu_latency_clock(instr_cycles); - instr_cycles += COUNT(timings[opcode], op_32); - instr_cycles += exec_delay; - if ((decode_delay + decode_delay_offset) > 0) - codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; - else - codegen_block_cycles += instr_cycles; - decode_delay = (-instr_cycles) + 1; + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; + decode_delay = (-instr_cycles) + 1; + if (deps[opcode] & FPU_POP) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c + 1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) { + for (uint8_t c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c + 2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if (timings[opcode] & CYCLES_FPU) { +#if 0 + if (fpu_latency) + fatal("Bad latency FPU\n");*/ +#endif + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c + 1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) { +#if 0 + if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ +#endif + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) { +#if 0 + if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ +#endif + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps[opcode] & FPU_POP) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c] = fpu_st_latency[c+1]; - fpu_st_latency[7] = 0; - } - if (deps[opcode] & FPU_POP2) - { - int c; - - for (c = 0; c < 6; c++) - fpu_st_latency[c] = fpu_st_latency[c+2]; - fpu_st_latency[6] = fpu_st_latency[7] = 0; - } - if (timings[opcode] & CYCLES_FPU) - { - /* if (fpu_latency) - fatal("Bad latency FPU\n");*/ - fpu_latency = FPU_F_LATENCY(timings[opcode]); - } - - if (deps[opcode] & FPU_PUSH) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c+1] = fpu_st_latency[c]; - fpu_st_latency[0] = 0; - } - if (deps[opcode] & FPU_WRITE_ST0) - { -/* if (fpu_st_latency[0]) - fatal("Bad latency ST0\n");*/ - fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_ST1) - { -/* if (fpu_st_latency[1]) - fatal("Bad latency ST1\n");*/ - fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps[opcode] & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && - !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) - { -/* if (fpu_st_latency[reg]) - fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ - fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) { +#if 0 + if (fpu_st_latency[reg]) + fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ +#endif + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); } + } } -static void codegen_timing_winchip2_block_start(void) +static void +codegen_timing_winchip2_block_start(void) { - regmask_modified = 0; - decode_delay = decode_delay_offset = 0; - u_pipe_full = 0; + regmask_modified = 0; + decode_delay = decode_delay_offset = 0; + u_pipe_full = 0; } -static void codegen_timing_winchip2_start(void) +static void +codegen_timing_winchip2_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -static void codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat) +static void +codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat) { - if (prefix == 0x0f) - { - /*0fh prefix is 'free'*/ - last_prefix = prefix; - return; - } - /*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed - by execution of previous instructions*/ - decode_delay_offset++; + if (prefix == 0x0f) { + /*0fh prefix is 'free'*/ last_prefix = prefix; + return; + } + /*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; } -static void codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +static void +codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint32_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); - int agi_stall = 0; + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (u_pipe_full) { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask)) { + int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff; + int cycles_b = timings[opcode] & 0xff; + uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode]; + uint64_t temp_deps = 0; - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (u_pipe_full) - { - uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - - if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask)) - { - int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff; - int cycles_b = timings[opcode] & 0xff; - uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode]; - uint64_t temp_deps = 0; - - if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - - codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall); - u_pipe_full = 0; - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; - return; - } - else - { - /*No pairing, run first instruction now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; - regmask_modified = u_pipe_regmask; - } - } - if (timings[opcode] & CYCLES_IS_MMX) - { - /*Might pair with next instruction*/ - u_pipe_full = 1; - u_pipe_opcode = opcode; - u_pipe_timings = timings; - u_pipe_op_32 = op_32; - u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - u_pipe_fetchdat = fetchdat; - u_pipe_decode_delay_offset = decode_delay_offset; - u_pipe_deps = deps; - decode_delay_offset = 0; - return; - } - - if (check_agi(deps, opcode, fetchdat, op_32)) + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; - codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); -} -static void codegen_timing_winchip2_block_end(void) -{ - if (u_pipe_full) - { - int agi_stall = 0; - - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; + codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + return; + } else { + /*No pairing, run first instruction now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; } + } + if (timings[opcode] & CYCLES_IS_MMX) { + /*Might pair with next instruction*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -codegen_timing_t codegen_timing_winchip2 = +static void +codegen_timing_winchip2_block_end(void) { - codegen_timing_winchip2_start, - codegen_timing_winchip2_prefix, - codegen_timing_winchip2_opcode, - codegen_timing_winchip2_block_start, - codegen_timing_winchip2_block_end, - NULL + if (u_pipe_full) { + int agi_stall = 0; + + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_winchip2 = { + codegen_timing_winchip2_start, + codegen_timing_winchip2_prefix, + codegen_timing_winchip2_opcode, + codegen_timing_winchip2_block_start, + codegen_timing_winchip2_block_end, + NULL }; diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index d045d8aaa..c5ed8a310 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -38,6 +38,9 @@ #include <86box/pic.h> #include <86box/pci.h> #include <86box/gdbstub.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> + #ifdef USE_DYNAREC # include "codegen.h" #endif @@ -903,7 +906,7 @@ cpu_set(void) #endif x86_setopcodes_2386(ops_2386_386, ops_2386_ibm486_0f); cpu_features = CPU_FEATURE_MSR; - /* FALLTHROUGH */ + fallthrough; case CPU_386SX: case CPU_386DX: /* In case we get Deskpro 386 emulation */ @@ -1126,7 +1129,7 @@ cpu_set(void) case CPU_i486DX_SLENH: cpu_features = CPU_FEATURE_CR4 | CPU_FEATURE_VME; cpu_CR4_mask = CR4_VME | CR4_PVI | CR4_VME; - /* FALLTHROUGH */ + fallthrough; case CPU_RAPIDCAD: case CPU_i486SX: case CPU_i486DX: @@ -1439,7 +1442,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f, dynarec_ops_386, dynarec_ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f, dynarec_ops_386, dynarec_ops_c6x86mx_0f); - // x86_setopcodes(ops_386, ops_c6x86_0f, dynarec_ops_386, dynarec_ops_c6x86_0f); +#if 0 + x86_setopcodes(ops_386, ops_c6x86_0f, dynarec_ops_386, dynarec_ops_c6x86_0f); +#endif # else if (cpu_s->cpu_type == CPU_Cx6x86MX) x86_setopcodes(ops_386, ops_c6x86mx_0f); @@ -1447,7 +1452,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f); - // x86_setopcodes(ops_386, ops_c6x86_0f); +#if 0 + x86_setopcodes(ops_386, ops_c6x86_0f); +#endif # endif timing_rr = 1; /* register dest - register src */ @@ -2465,7 +2472,7 @@ cpu_ven_reset(void) case CPU_K6_3: case CPU_K6_2C: msr.amd_psor = (cpu_s->cpu_type >= CPU_K6_3) ? 0x008cULL : 0x018cULL; - /* FALLTHROUGH */ + fallthrough; case CPU_K6_2: #if defined(DEV_BRANCH) && defined(USE_AMD_K5) case CPU_K5: @@ -2479,7 +2486,6 @@ cpu_ven_reset(void) case CPU_PENTIUM2: case CPU_PENTIUM2D: msr.mtrr_cap = 0x00000508ULL; - /* FALLTHROUGH */ break; } } @@ -3252,7 +3258,9 @@ amd_k_invalid_wrmsr: break; case 0x1b: cpu_log("APIC_BASE write: %08X%08X\n", EDX, EAX); - // msr.apic_base = EAX | ((uint64_t) EDX << 32); +#if 0 + msr.apic_base = EAX | ((uint64_t) EDX << 32); +#endif break; case 0x2a: break; @@ -3421,7 +3429,7 @@ i686_invalid_wrmsr: } static void -cpu_write(uint16_t addr, uint8_t val, void *priv) +cpu_write(uint16_t addr, uint8_t val, UNUSED(void *priv)) { if (addr == 0xf0) { /* Writes to F0 clear FPU error and deassert the interrupt. */ @@ -3503,7 +3511,7 @@ cpu_write(uint16_t addr, uint8_t val, void *priv) } static uint8_t -cpu_read(uint16_t addr, void *priv) +cpu_read(uint16_t addr, UNUSED(void *priv)) { if (addr == 0xf007) return 0x7f; diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 6c01f2469..cbd1c1129 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -432,15 +432,15 @@ typedef struct { } cpu_state_t; typedef struct { - uint16_t cwd; - uint16_t swd; - uint16_t tag; - uint16_t foo; - uint32_t fip; - uint32_t fdp; - uint16_t fcs; - uint16_t fds; - floatx80 st_space[8]; + uint16_t cwd; + uint16_t swd; + uint16_t tag; + uint16_t foo; + uint32_t fip; + uint32_t fdp; + uint16_t fcs; + uint16_t fds; + floatx80 st_space[8]; unsigned char tos; unsigned char align1; unsigned char align2; diff --git a/src/cpu/x86.c b/src/cpu/x86.c index 76101c344..182431773 100644 --- a/src/cpu/x86.c +++ b/src/cpu/x86.c @@ -59,10 +59,12 @@ uint32_t rmdat; uint64_t xt_cpu_multi; /* Variables for handling the non-maskable interrupts. */ -int nmi = 0, nmi_auto_clear = 0; +int nmi = 0; +int nmi_auto_clear = 0; /* Was the CPU ever reset? */ -int x86_was_reset = 0, soft_reset_pci = 0; +int x86_was_reset = 0; +int soft_reset_pci = 0; /* Is the TRAP flag on? */ int trap = 0; @@ -71,7 +73,8 @@ int trap = 0; uint32_t easeg; /* This is for the OPTI 283 special reset handling mode. */ -int reset_on_hlt, hlt_reset_pending; +int reset_on_hlt; +int hlt_reset_pending; #ifdef ENABLE_X86_LOG void dumpregs(int); @@ -171,7 +174,10 @@ makemod1table(void) static void makeznptable(void) { - int c, d, e; + int c; + int d; + int e; + for (c = 0; c < 256; c++) { d = 0; for (e = 0; e < 8; e++) { diff --git a/src/cpu/x86.h b/src/cpu/x86.h index 337619fa4..77d9329fe 100644 --- a/src/cpu/x86.h +++ b/src/cpu/x86.h @@ -9,30 +9,44 @@ that we don't end up with an unnecessarily short block*/ #define ABRT_EXPECTED 0x80 -extern uint8_t opcode, opcode2; +extern uint8_t opcode; +extern uint8_t opcode2; extern uint8_t flags_p; extern uint8_t znptable8[256]; -extern uint16_t zero, oldcs; -extern uint16_t lastcs, lastpc; +extern uint16_t zero; +extern uint16_t oldcs; +extern uint16_t lastcs; +extern uint16_t lastpc; extern uint16_t *mod1add[2][8]; extern uint16_t znptable16[65536]; -extern int x86_was_reset, trap; -extern int codegen_flat_ss, codegen_flat_ds; -extern int timetolive, keyboardtimer, trap; -extern int optype, stack32; -extern int oldcpl, cgate32, cpl_override; +extern int x86_was_reset; +extern int trap; +extern int codegen_flat_ss; +extern int codegen_flat_ds; +extern int timetolive; +extern int keyboardtimer; +extern int trap; +extern int optype; +extern int stack32; +extern int oldcpl; +extern int cgate32; +extern int cpl_override; extern int nmi_enable; -extern int oddeven, inttype; +extern int oddeven; +extern int inttype; extern uint32_t use32; -extern uint32_t rmdat, easeg; -extern uint32_t oxpc, flags_zn; +extern uint32_t rmdat; +extern uint32_t easeg; +extern uint32_t oxpc; +extern uint32_t flags_zn; extern uint32_t abrt_error; extern uint32_t backupregs[16]; extern uint32_t *mod1seg[8]; -extern uint32_t *eal_r, *eal_w; +extern uint32_t *eal_r; +extern uint32_t *eal_w; #define fetchdat rmdat @@ -68,13 +82,13 @@ extern uint32_t *eal_r, *eal_w; enum { ABRT_NONE = 0, - ABRT_GEN, - ABRT_TS = 0xA, - ABRT_NP = 0xB, - ABRT_SS = 0xC, - ABRT_GPF = 0xD, - ABRT_PF = 0xE, - ABRT_DE = 0x40 /* INT 0, but we have to distinguish it from ABRT_NONE. */ + ABRT_GEN = 1, + ABRT_TS = 0xA, + ABRT_NP = 0xB, + ABRT_SS = 0xC, + ABRT_GPF = 0xD, + ABRT_PF = 0xE, + ABRT_DE = 0x40 /* INT 0, but we have to distinguish it from ABRT_NONE. */ }; extern void x86_doabrt(int x86_abrt); diff --git a/src/cpu/x86_ops_3dnow.h b/src/cpu/x86_ops_3dnow.h index ff657d708..e9826a7e3 100644 --- a/src/cpu/x86_ops_3dnow.h +++ b/src/cpu/x86_ops_3dnow.h @@ -139,7 +139,8 @@ opPSWAPD(uint32_t fetchdat) { MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf, tempf2; + float tempf; + float tempf2; MMX_GETSRC(); diff --git a/src/cpu/x86_ops_arith.h b/src/cpu/x86_ops_arith.h index 41c655d09..09d64bb87 100644 --- a/src/cpu/x86_ops_arith.h +++ b/src/cpu/x86_ops_arith.h @@ -340,6 +340,7 @@ static int opCMP_b_rmw_a16(uint32_t fetchdat) { uint8_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -359,6 +360,7 @@ static int opCMP_b_rmw_a32(uint32_t fetchdat) { uint8_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -379,6 +381,7 @@ static int opCMP_w_rmw_a16(uint32_t fetchdat) { uint16_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -398,6 +401,7 @@ static int opCMP_w_rmw_a32(uint32_t fetchdat) { uint16_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -418,6 +422,7 @@ static int opCMP_l_rmw_a16(uint32_t fetchdat) { uint32_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -437,6 +442,7 @@ static int opCMP_l_rmw_a32(uint32_t fetchdat) { uint32_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -457,6 +463,7 @@ static int opCMP_b_rm_a16(uint32_t fetchdat) { uint8_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -472,6 +479,7 @@ static int opCMP_b_rm_a32(uint32_t fetchdat) { uint8_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -488,6 +496,7 @@ static int opCMP_w_rm_a16(uint32_t fetchdat) { uint16_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -503,6 +512,7 @@ static int opCMP_w_rm_a32(uint32_t fetchdat) { uint16_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -519,6 +529,7 @@ static int opCMP_l_rm_a16(uint32_t fetchdat) { uint32_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -534,6 +545,7 @@ static int opCMP_l_rm_a32(uint32_t fetchdat) { uint32_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -550,6 +562,7 @@ static int opCMP_AL_imm(uint32_t fetchdat) { uint8_t src = getbytef(); + setsub8(AL, src); CLOCK_CYCLES(timing_rr); PREFETCH_RUN(timing_rr, 2, -1, 0, 0, 0, 0, 0); @@ -560,6 +573,7 @@ static int opCMP_AX_imm(uint32_t fetchdat) { uint16_t src = getwordf(); + setsub16(AX, src); CLOCK_CYCLES(timing_rr); PREFETCH_RUN(timing_rr, 3, -1, 0, 0, 0, 0, 0); @@ -570,6 +584,7 @@ static int opCMP_EAX_imm(uint32_t fetchdat) { uint32_t src = getlong(); + if (cpu_state.abrt) return 1; setsub32(EAX, src); @@ -581,7 +596,9 @@ opCMP_EAX_imm(uint32_t fetchdat) static int opTEST_b_a16(uint32_t fetchdat) { - uint8_t temp, temp2; + uint8_t temp; + uint8_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -601,7 +618,9 @@ opTEST_b_a16(uint32_t fetchdat) static int opTEST_b_a32(uint32_t fetchdat) { - uint8_t temp, temp2; + uint8_t temp; + uint8_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -622,7 +641,9 @@ opTEST_b_a32(uint32_t fetchdat) static int opTEST_w_a16(uint32_t fetchdat) { - uint16_t temp, temp2; + uint16_t temp; + uint16_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -642,7 +663,9 @@ opTEST_w_a16(uint32_t fetchdat) static int opTEST_w_a32(uint32_t fetchdat) { - uint16_t temp, temp2; + uint16_t temp; + uint16_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -663,7 +686,9 @@ opTEST_w_a32(uint32_t fetchdat) static int opTEST_l_a16(uint32_t fetchdat) { - uint32_t temp, temp2; + uint32_t temp; + uint16_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -683,7 +708,9 @@ opTEST_l_a16(uint32_t fetchdat) static int opTEST_l_a32(uint32_t fetchdat) { - uint32_t temp, temp2; + uint32_t temp; + uint16_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -803,7 +830,8 @@ opTEST_EAX(uint32_t fetchdat) static int op80_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -823,7 +851,8 @@ op80_a16(uint32_t fetchdat) static int op80_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -843,7 +872,8 @@ op80_a32(uint32_t fetchdat) static int op81_w_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -863,7 +893,8 @@ op81_w_a16(uint32_t fetchdat) static int op81_w_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -883,7 +914,8 @@ op81_w_a32(uint32_t fetchdat) static int op81_l_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -903,7 +935,8 @@ op81_l_a16(uint32_t fetchdat) static int op81_l_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -924,7 +957,8 @@ op81_l_a32(uint32_t fetchdat) static int op83_w_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -946,7 +980,8 @@ op83_w_a16(uint32_t fetchdat) static int op83_w_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -969,7 +1004,8 @@ op83_w_a32(uint32_t fetchdat) static int op83_l_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -991,7 +1027,8 @@ op83_l_a16(uint32_t fetchdat) static int op83_l_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_atomic.h b/src/cpu/x86_ops_atomic.h index 4f3439973..f0cab34e1 100644 --- a/src/cpu/x86_ops_atomic.h +++ b/src/cpu/x86_ops_atomic.h @@ -1,7 +1,9 @@ static int opCMPXCHG_b_a16(uint32_t fetchdat) { - uint8_t temp, temp2 = AL; + uint8_t temp; + uint8_t temp2 = AL; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteab(); @@ -20,7 +22,9 @@ opCMPXCHG_b_a16(uint32_t fetchdat) static int opCMPXCHG_b_a32(uint32_t fetchdat) { - uint8_t temp, temp2 = AL; + uint8_t temp; + uint8_t temp2 = AL; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteab(); @@ -40,7 +44,9 @@ opCMPXCHG_b_a32(uint32_t fetchdat) static int opCMPXCHG_w_a16(uint32_t fetchdat) { - uint16_t temp, temp2 = AX; + uint16_t temp; + uint16_t temp2 = AX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteaw(); @@ -59,7 +65,9 @@ opCMPXCHG_w_a16(uint32_t fetchdat) static int opCMPXCHG_w_a32(uint32_t fetchdat) { - uint16_t temp, temp2 = AX; + uint16_t temp; + uint16_t temp2 = AX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteaw(); @@ -79,7 +87,9 @@ opCMPXCHG_w_a32(uint32_t fetchdat) static int opCMPXCHG_l_a16(uint32_t fetchdat) { - uint32_t temp, temp2 = EAX; + uint32_t temp; + uint32_t temp2 = EAX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -98,7 +108,9 @@ opCMPXCHG_l_a16(uint32_t fetchdat) static int opCMPXCHG_l_a32(uint32_t fetchdat) { - uint32_t temp, temp2 = EAX; + uint32_t temp; + uint32_t temp2 = EAX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -119,7 +131,11 @@ opCMPXCHG_l_a32(uint32_t fetchdat) static int opCMPXCHG8B_a16(uint32_t fetchdat) { - uint32_t temp, temp_hi, temp2 = EAX, temp2_hi = EDX; + uint32_t temp; + uint32_t temp_hi; + uint32_t temp2 = EAX; + uint32_t temp2_hi = EDX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -146,7 +162,11 @@ opCMPXCHG8B_a16(uint32_t fetchdat) static int opCMPXCHG8B_a32(uint32_t fetchdat) { - uint32_t temp, temp_hi, temp2 = EAX, temp2_hi = EDX; + uint32_t temp; + uint32_t temp_hi; + uint32_t temp2 = EAX; + uint32_t temp2_hi = EDX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -177,7 +197,9 @@ static int opXADD_b_a16(uint32_t fetchdat) { uint8_t temp; - uint8_t src, dest; + uint8_t src; + uint8_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = getr8(cpu_reg); @@ -197,7 +219,9 @@ static int opXADD_b_a32(uint32_t fetchdat) { uint8_t temp; - uint8_t src, dest; + uint8_t src; + uint8_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = getr8(cpu_reg); @@ -218,7 +242,9 @@ static int opXADD_w_a16(uint32_t fetchdat) { uint16_t temp; - uint16_t src, dest; + uint16_t src; + uint16_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].w; @@ -238,7 +264,9 @@ static int opXADD_w_a32(uint32_t fetchdat) { uint16_t temp; - uint16_t src, dest; + uint16_t src; + uint16_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].w; @@ -259,7 +287,9 @@ static int opXADD_l_a16(uint32_t fetchdat) { uint32_t temp; - uint32_t src, dest; + uint32_t src; + uint32_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].l; @@ -279,7 +309,9 @@ static int opXADD_l_a32(uint32_t fetchdat) { uint32_t temp; - uint32_t src, dest; + uint32_t src; + uint32_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].l; diff --git a/src/cpu/x86_ops_bcd.h b/src/cpu/x86_ops_bcd.h index d3ff97ead..b4779ab3e 100644 --- a/src/cpu/x86_ops_bcd.h +++ b/src/cpu/x86_ops_bcd.h @@ -19,6 +19,7 @@ static int opAAD(uint32_t fetchdat) { int base = getbytef(); + if (!cpu_isintel) base = 10; AL = (AH * base) + AL; @@ -33,6 +34,7 @@ static int opAAM(uint32_t fetchdat) { int base = getbytef(); + if (!base || !cpu_isintel) base = 10; AH = AL / base; @@ -63,7 +65,9 @@ opAAS(uint32_t fetchdat) static int opDAA(uint32_t fetchdat) { - uint16_t tempw, old_AL, old_CF; + uint16_t tempw; + uint16_t old_AL; + uint16_t old_CF; flags_rebuild(); old_AL = AL; @@ -98,7 +102,9 @@ opDAA(uint32_t fetchdat) static int opDAS(uint32_t fetchdat) { - uint16_t tempw, old_AL, old_CF; + uint16_t tempw; + uint16_t old_AL; + uint16_t old_CF; flags_rebuild(); old_AL = AL; diff --git a/src/cpu/x86_ops_bit.h b/src/cpu/x86_ops_bit.h index 8514e8e1c..b72142b5a 100644 --- a/src/cpu/x86_ops_bit.h +++ b/src/cpu/x86_ops_bit.h @@ -201,13 +201,17 @@ opBT_l_r_a32(uint32_t fetchdat) return 0; \ } +// clang-format off opBT(C, ^=) - opBT(R, &= ~) - opBT(S, |=) +opBT(R, &= ~) +opBT(S, |=) + // clang-format on - static int opBA_w_a16(uint32_t fetchdat) +static int +opBA_w_a16(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint16_t temp; fetch_ea_16(fetchdat); @@ -258,7 +262,8 @@ opBT(C, ^=) static int opBA_w_a32(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint16_t temp; fetch_ea_32(fetchdat); @@ -310,7 +315,8 @@ opBA_w_a32(uint32_t fetchdat) static int opBA_l_a16(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint32_t temp; fetch_ea_16(fetchdat); @@ -361,7 +367,8 @@ opBA_l_a16(uint32_t fetchdat) static int opBA_l_a32(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint32_t temp; fetch_ea_32(fetchdat); diff --git a/src/cpu/x86_ops_call.h b/src/cpu/x86_ops_call.h index 88899cef8..731f58ec8 100644 --- a/src/cpu/x86_ops_call.h +++ b/src/cpu/x86_ops_call.h @@ -189,8 +189,10 @@ static int opCALL_far_w(uint32_t fetchdat) { - uint32_t old_cs, old_pc; - uint16_t new_cs, new_pc; + uint32_t old_cs; + uint32_t old_pc; + uint16_t new_cs; + uint16_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -209,8 +211,10 @@ opCALL_far_w(uint32_t fetchdat) static int opCALL_far_l(uint32_t fetchdat) { - uint32_t old_cs, old_pc; - uint32_t new_cs, new_pc; + uint32_t old_cs; + uint32_t old_pc; + uint32_t new_cs; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -230,8 +234,10 @@ opCALL_far_l(uint32_t fetchdat) static int opFF_w_a16(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -392,8 +398,10 @@ opFF_w_a16(uint32_t fetchdat) static int opFF_w_a32(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -555,8 +563,10 @@ opFF_w_a32(uint32_t fetchdat) static int opFF_l_a16(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -717,8 +727,10 @@ opFF_l_a16(uint32_t fetchdat) static int opFF_l_a32(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); diff --git a/src/cpu/x86_ops_fpu.h b/src/cpu/x86_ops_fpu.h index 29e999941..849e24e3d 100644 --- a/src/cpu/x86_ops_fpu.h +++ b/src/cpu/x86_ops_fpu.h @@ -97,7 +97,9 @@ opWAIT(uint32_t fetchdat) return 1; } - // if (!cpu_use_dynarec && fpu_softfloat) { +#if 0 + if (!cpu_use_dynarec && fpu_softfloat) { +#endif if (fpu_softfloat) { if (fpu_state.swd & FPU_SW_Summary) { if (cr0 & 0x20) { diff --git a/src/cpu/x86_ops_i686.h b/src/cpu/x86_ops_i686.h index f2b07a1c4..f11bca945 100644 --- a/src/cpu/x86_ops_i686.h +++ b/src/cpu/x86_ops_i686.h @@ -178,11 +178,13 @@ fx_save_stor_common(uint32_t fetchdat, int bits) uint8_t ftwb = 0; uint16_t rec_ftw = 0; uint16_t fpus = 0; - int i, mmx_tags = 0; + int i; + int mmx_tags = 0; uint16_t exp = 0x0000; uint64_t mant = 0x0000000000000000ULL; uint64_t fraction; - uint8_t jm, valid; + uint8_t jm; + uint8_t valid; /* Exp_all_1 Exp_all_0 Frac_all_0 J M FTW_Valid | Ent ----------------------------------------------+------ */ uint8_t ftw_table_idx; diff --git a/src/cpu/x86_ops_jump.h b/src/cpu/x86_ops_jump.h index a1503a75e..33e1ed4f0 100644 --- a/src/cpu/x86_ops_jump.h +++ b/src/cpu/x86_ops_jump.h @@ -88,7 +88,8 @@ opJ(LE) opJ(NLE) // clang-format on - static int opLOOPNE_w(uint32_t fetchdat) +static int +opLOOPNE_w(uint32_t fetchdat) { int8_t offset = (int8_t) getbytef(); CX--; @@ -271,8 +272,10 @@ opJMP_r32(uint32_t fetchdat) static int opJMP_far_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; uint32_t old_pc; + addr = getwordf(); seg = getword(); if (cpu_state.abrt) @@ -289,7 +292,9 @@ static int opJMP_far_a32(uint32_t fetchdat) { uint16_t seg; - uint32_t addr, old_pc; + uint32_t addr; + uint32_t old_pc; + addr = getlong(); seg = getword(); if (cpu_state.abrt) @@ -307,6 +312,7 @@ static int opCALL_r16(uint32_t fetchdat) { int16_t addr = (int16_t) getwordf(); + PUSH_W(cpu_state.pc); cpu_state.pc += addr; cpu_state.pc &= 0xffff; @@ -320,6 +326,7 @@ static int opCALL_r32(uint32_t fetchdat) { int32_t addr = getlong(); + if (cpu_state.abrt) return 1; PUSH_L(cpu_state.pc); diff --git a/src/cpu/x86_ops_misc.h b/src/cpu/x86_ops_misc.h index 60ed873e4..170457caa 100644 --- a/src/cpu/x86_ops_misc.h +++ b/src/cpu/x86_ops_misc.h @@ -55,7 +55,8 @@ opF6_a16(uint32_t fetchdat) int tempws2 = 0; uint16_t tempw = 0; uint16_t src16; - uint8_t src, dst; + uint8_t src; + uint8_t dst; int8_t temps; fetch_ea_16(fetchdat); @@ -173,7 +174,8 @@ opF6_a32(uint32_t fetchdat) int tempws2 = 0; uint16_t tempw = 0; uint16_t src16; - uint8_t src, dst; + uint8_t src; + uint8_t dst; int8_t temps; fetch_ea_32(fetchdat); @@ -404,7 +406,8 @@ opF7_w_a32(uint32_t fetchdat) int tempws; int tempws2 = 1; int16_t temps16; - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -514,7 +517,8 @@ static int opF7_l_a16(uint32_t fetchdat) { uint64_t temp64; - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -606,7 +610,8 @@ static int opF7_l_a32(uint32_t fetchdat) { uint64_t temp64; - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -739,7 +744,8 @@ opLOCK(uint32_t fetchdat) static int opBOUND_w_a16(uint32_t fetchdat) { - int16_t low, high; + int16_t low; + int16_t high; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -761,7 +767,8 @@ opBOUND_w_a16(uint32_t fetchdat) static int opBOUND_w_a32(uint32_t fetchdat) { - int16_t low, high; + int16_t low; + int16_t high; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -784,7 +791,8 @@ opBOUND_w_a32(uint32_t fetchdat) static int opBOUND_l_a16(uint32_t fetchdat) { - int32_t low, high; + int32_t low; + int32_t high; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -806,7 +814,8 @@ opBOUND_l_a16(uint32_t fetchdat) static int opBOUND_l_a32(uint32_t fetchdat) { - int32_t low, high; + int32_t low; + int32_t high; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); diff --git a/src/cpu/x86_ops_mmx_mov.h b/src/cpu/x86_ops_mmx_mov.h index 65bbb0c01..f04e271ef 100644 --- a/src/cpu/x86_ops_mmx_mov.h +++ b/src/cpu/x86_ops_mmx_mov.h @@ -115,7 +115,7 @@ opMOVD_mm_l_a32(uint32_t fetchdat) static int opMOVD_mm_l_a16_cx(uint32_t fetchdat) { - MMX_REG *op; + const MMX_REG *op; if (in_smm) return opSMINT(fetchdat); @@ -144,7 +144,7 @@ opMOVD_mm_l_a16_cx(uint32_t fetchdat) static int opMOVD_mm_l_a32_cx(uint32_t fetchdat) { - MMX_REG *op; + const MMX_REG *op; if (in_smm) return opSMINT(fetchdat); diff --git a/src/cpu/x86_ops_mov_ctrl.h b/src/cpu/x86_ops_mov_ctrl.h index d95116c93..b0c841f83 100644 --- a/src/cpu/x86_ops_mov_ctrl.h +++ b/src/cpu/x86_ops_mov_ctrl.h @@ -251,12 +251,17 @@ opMOV_DRx_r_a32(uint32_t fetchdat) static void opMOV_r_TRx(void) { - // uint32_t base; +#if 0 + uint32_t base; + + base = _tr[4] & 0xfffff800; +#endif - // base = _tr[4] & 0xfffff800; switch (cpu_reg) { case 3: - // pclog("[R] %08X cache = %08X\n", base + cache_index, _tr[3]); +#if 0 + pclog("[R] %08X cache = %08X\n", base + cache_index, _tr[3]); +#endif _tr[3] = *(uint32_t *) &(_cache[cache_index]); cache_index = (cache_index + 4) & 0xf; break; @@ -293,42 +298,57 @@ static void opMOV_TRx_r(void) { uint32_t base; - int i, ctl; + int i; + int ctl; _tr[cpu_reg] = cpu_state.regs[cpu_rm].l; base = _tr[4] & 0xfffff800; ctl = _tr[5] & 3; switch (cpu_reg) { case 3: - // pclog("[W] %08X cache = %08X\n", base + cache_index, _tr[3]); +#if 0 + pclog("[W] %08X cache = %08X\n", base + cache_index, _tr[3]); +#endif *(uint32_t *) &(_cache[cache_index]) = _tr[3]; cache_index = (cache_index + 4) & 0xf; break; case 4: - // if (!(cr0 & 1) && !(_tr[5] & (1 << 19))) - // pclog("TAG = %08X, DEST = %08X\n", base, base + cache_index - 16); +#if 0 + if (!(cr0 & 1) && !(_tr[5] & (1 << 19))) + pclog("TAG = %08X, DEST = %08X\n", base, base + cache_index - 16); +#endif break; case 5: - // pclog("[16] EXT = %i (%i), SET = %04X\n", !!(_tr[5] & (1 << 19)), _tr[5] & 0x03, _tr[5] & 0x7f0); +#if 0 + pclog("[16] EXT = %i (%i), SET = %04X\n", !!(_tr[5] & (1 << 19)), _tr[5] & 0x03, _tr[5] & 0x7f0); +#endif if (!(_tr[5] & (1 << 19))) { switch (ctl) { case 0: - // pclog(" Cache fill or read...\n", base); +#if 0 + pclog(" Cache fill or read...\n", base); +#endif break; case 1: base += (_tr[5] & 0x7f0); - // pclog(" Writing 16 bytes to %08X...\n", base); +#if 0 + pclog(" Writing 16 bytes to %08X...\n", base); +#endif for (i = 0; i < 16; i += 4) mem_writel_phys(base + i, *(uint32_t *) &(_cache[i])); break; case 2: base += (_tr[5] & 0x7f0); - // pclog(" Reading 16 bytes from %08X...\n", base); +#if 0 + pclog(" Reading 16 bytes from %08X...\n", base); +#endif for (i = 0; i < 16; i += 4) *(uint32_t *) &(_cache[i]) = mem_readl_phys(base + i); break; case 3: - // pclog(" Cache invalidate/flush...\n", base); +#if 0 + pclog(" Cache invalidate/flush...\n", base); +#endif break; } } diff --git a/src/cpu/x86_ops_mov_seg.h b/src/cpu/x86_ops_mov_seg.h index c6bfd9933..66e77d585 100644 --- a/src/cpu/x86_ops_mov_seg.h +++ b/src/cpu/x86_ops_mov_seg.h @@ -258,7 +258,8 @@ opMOV_seg_w_a32(uint32_t fetchdat) static int opLDS_w_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -280,7 +281,8 @@ opLDS_w_a16(uint32_t fetchdat) static int opLDS_w_a32(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -349,7 +351,8 @@ opLDS_l_a32(uint32_t fetchdat) static int opLSS_w_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -371,7 +374,8 @@ opLSS_w_a16(uint32_t fetchdat) static int opLSS_w_a32(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); diff --git a/src/cpu/x86_ops_mul.h b/src/cpu/x86_ops_mul.h index 552a9973a..aa7526e75 100644 --- a/src/cpu/x86_ops_mul.h +++ b/src/cpu/x86_ops_mul.h @@ -2,7 +2,8 @@ static int opIMUL_w_iw_a16(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -31,7 +32,8 @@ static int opIMUL_w_iw_a32(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -61,7 +63,8 @@ static int opIMUL_l_il_a16(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -90,7 +93,8 @@ static int opIMUL_l_il_a32(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -120,7 +124,8 @@ static int opIMUL_w_ib_a16(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -151,7 +156,8 @@ static int opIMUL_w_ib_a32(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -183,7 +189,8 @@ static int opIMUL_l_ib_a16(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -214,7 +221,8 @@ static int opIMUL_l_ib_a32(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_pmode.h b/src/cpu/x86_ops_pmode.h index c8e146450..1254d7289 100644 --- a/src/cpu/x86_ops_pmode.h +++ b/src/cpu/x86_ops_pmode.h @@ -179,10 +179,16 @@ opLAR(w_a16, fetch_ea_16, 0, 0) static int op0F00_common(uint32_t fetchdat, int ea32) { - int dpl, valid, granularity; - uint32_t addr, base, limit; - uint16_t desc, sel; - uint8_t access, ar_high; + int dpl; + int valid; + int granularity; + uint32_t addr; + uint32_t base; + uint32_t limit; + uint16_t desc; + uint16_t sel; + uint8_t access; + uint8_t ar_high; switch (rmdat & 0x38) { case 0x00: /*SLDT*/ @@ -356,7 +362,9 @@ static int op0F01_common(uint32_t fetchdat, int is32, int is286, int ea32) { uint32_t base; - uint16_t limit, tempw; + uint16_t limit; + uint16_t tempw; + switch (rmdat & 0x38) { case 0x00: /*SGDT*/ if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_prefix.h b/src/cpu/x86_ops_prefix.h index eba59c17a..1918e8acd 100644 --- a/src/cpu/x86_ops_prefix.h +++ b/src/cpu/x86_ops_prefix.h @@ -90,7 +90,8 @@ op_seg(GS_REPNE, cpu_state.seg_gs, x86_opcodes_REPNE, x86_opcodes) op_seg(SS_REPNE, cpu_state.seg_ss, x86_opcodes_REPNE, x86_opcodes) // clang-format on - static int op_66(uint32_t fetchdat) /*Data size select*/ +static int +op_66(uint32_t fetchdat) /*Data size select*/ { fetchdat = fastreadl(cs + cpu_state.pc); if (cpu_state.abrt) diff --git a/src/cpu/x86_ops_ret.h b/src/cpu/x86_ops_ret.h index ec200f2d7..64da566d3 100644 --- a/src/cpu/x86_ops_ret.h +++ b/src/cpu/x86_ops_ret.h @@ -190,7 +190,9 @@ opIRET(uint32_t fetchdat) if ((cr0 & 1) && (cpu_state.eflags & VM_FLAG) && (IOPL != 3)) { if (cr4 & CR4_VME) { - uint16_t new_pc, new_cs, new_flags; + uint16_t new_pc; + uint16_t new_cs; + uint16_t new_flags; new_pc = readmemw(ss, SP); new_cs = readmemw(ss, ((SP + 2) & 0xffff)); diff --git a/src/cpu/x86_ops_shift.h b/src/cpu/x86_ops_shift.h index 9c11a32f0..22c9aa8b6 100644 --- a/src/cpu/x86_ops_shift.h +++ b/src/cpu/x86_ops_shift.h @@ -605,7 +605,8 @@ opC0_a16(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -624,7 +625,8 @@ opC0_a32(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -643,7 +645,8 @@ opC1_w_a16(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -662,7 +665,8 @@ opC1_w_a32(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -681,7 +685,8 @@ opC1_l_a16(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -700,7 +705,8 @@ opC1_l_a32(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -720,7 +726,8 @@ opD0_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -736,7 +743,8 @@ opD0_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -752,7 +760,8 @@ opD1_w_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -768,7 +777,8 @@ opD1_w_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -784,7 +794,8 @@ opD1_l_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -800,7 +811,8 @@ opD1_l_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -817,7 +829,8 @@ opD2_a16(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -834,7 +847,8 @@ opD2_a32(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -851,7 +865,8 @@ opD3_w_a16(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -868,7 +883,8 @@ opD3_w_a32(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -885,7 +901,8 @@ opD3_l_a16(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -902,7 +919,8 @@ opD3_l_a32(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_stack.h b/src/cpu/x86_ops_stack.h index 8217a9e5a..aa6d4f31f 100644 --- a/src/cpu/x86_ops_stack.h +++ b/src/cpu/x86_ops_stack.h @@ -379,9 +379,13 @@ opENTER_w(uint32_t fetchdat) { uint16_t offset; int count; - uint32_t tempEBP, tempESP, frame_ptr; + uint32_t tempEBP; + uint32_t tempESP; + uint32_t frame_ptr; #ifndef IS_DYNAREC - int reads = 0, writes = 1, instr_cycles = 0; + int reads = 0; + int writes = 1; + int instr_cycles = 0; #endif uint16_t tempw; @@ -448,9 +452,13 @@ opENTER_l(uint32_t fetchdat) { uint16_t offset; int count; - uint32_t tempEBP, tempESP, frame_ptr; + uint32_t tempEBP; + uint32_t tempESP; + uint32_t frame_ptr; #ifndef IS_DYNAREC - int reads = 0, writes = 1, instr_cycles = 0; + int reads = 0; + int writes = 1; + int instr_cycles = 0; #endif uint32_t templ; diff --git a/src/cpu/x86_ops_string.h b/src/cpu/x86_ops_string.h index c9ba94760..619386fcb 100644 --- a/src/cpu/x86_ops_string.h +++ b/src/cpu/x86_ops_string.h @@ -219,7 +219,8 @@ opMOVSL_a32(uint32_t fetchdat) static int opCMPSB_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -259,7 +260,8 @@ opCMPSB_a16(uint32_t fetchdat) static int opCMPSB_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -300,7 +302,8 @@ opCMPSB_a32(uint32_t fetchdat) static int opCMPSW_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -341,7 +344,8 @@ opCMPSW_a16(uint32_t fetchdat) static int opCMPSW_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -383,7 +387,8 @@ opCMPSW_a32(uint32_t fetchdat) static int opCMPSL_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; @@ -424,7 +429,8 @@ opCMPSL_a16(uint32_t fetchdat) static int opCMPSL_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; diff --git a/src/cpu/x86_ops_string_2386.h b/src/cpu/x86_ops_string_2386.h index 29ca0d9a4..98875e54f 100644 --- a/src/cpu/x86_ops_string_2386.h +++ b/src/cpu/x86_ops_string_2386.h @@ -219,7 +219,8 @@ opMOVSL_a32(uint32_t fetchdat) static int opCMPSB_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -255,7 +256,8 @@ opCMPSB_a16(uint32_t fetchdat) static int opCMPSB_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -292,7 +294,8 @@ opCMPSB_a32(uint32_t fetchdat) static int opCMPSW_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -329,7 +332,8 @@ opCMPSW_a16(uint32_t fetchdat) static int opCMPSW_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -367,7 +371,8 @@ opCMPSW_a32(uint32_t fetchdat) static int opCMPSL_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; @@ -404,7 +409,8 @@ opCMPSL_a16(uint32_t fetchdat) static int opCMPSL_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; diff --git a/src/cpu/x86seg.c b/src/cpu/x86seg.c index 7f630275c..3c4847a36 100644 --- a/src/cpu/x86seg.c +++ b/src/cpu/x86seg.c @@ -31,13 +31,17 @@ #include <86box/machine.h> #include <86box/mem.h> #include <86box/nvr.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_flags.h" #include "386_common.h" uint8_t opcode2; -int cgate16, cgate32; +int cgate16; +int cgate32; int intgatesize; void taskswitch286(uint16_t seg, uint16_t *segdat, int is32); @@ -157,7 +161,7 @@ x86_doabrt(int x86_abrt) } void -x86de(char *s, uint16_t error) +x86de(UNUSED(char *s), UNUSED(uint16_t error)) { #ifdef BAD_CODE cpu_state.abrt = ABRT_DE; @@ -168,35 +172,35 @@ x86de(char *s, uint16_t error) } void -x86gpf(char *s, uint16_t error) +x86gpf(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_GPF; abrt_error = error; } void -x86gpf_expected(char *s, uint16_t error) +x86gpf_expected(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_GPF | ABRT_EXPECTED; abrt_error = error; } void -x86ss(char *s, uint16_t error) +x86ss(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_SS; abrt_error = error; } void -x86ts(char *s, uint16_t error) +x86ts(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_TS; abrt_error = error; } void -x86np(char *s, uint16_t error) +x86np(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_NP; abrt_error = error; @@ -272,9 +276,9 @@ do_seg_v86_init(x86seg *s) static void check_seg_valid(x86seg *s) { - int dpl = (s->access >> 5) & 3; - int valid = 1; - x86seg *dt = (s->seg & 0x0004) ? &ldt : &gdt; + int dpl = (s->access >> 5) & 3; + int valid = 1; + const x86seg *dt = (s->seg & 0x0004) ? &ldt : &gdt; if (((s->seg & 0xfff8UL) + 7UL) > dt->limit) valid = 0; @@ -334,10 +338,11 @@ void #endif loadseg(uint16_t seg, x86seg *s) { - uint16_t segdat[4]; - uint32_t addr, *segdat32 = (uint32_t *) segdat; - int dpl; - x86seg *dt; + uint16_t segdat[4]; + uint32_t addr; + uint32_t *segdat32 = (uint32_t *) segdat; + int dpl; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { if (!(seg & 0xfffc)) { @@ -531,9 +536,10 @@ loadseg(uint16_t seg, x86seg *s) void loadcs(uint16_t seg) { - uint16_t segdat[4]; - uint32_t addr, *segdat32 = (uint32_t *) segdat; - x86seg *dt; + uint16_t segdat[4]; + uint32_t addr; + uint32_t *segdat32 = (uint32_t *) segdat; + const x86seg *dt; x86seg_log("Load CS %04X\n", seg); @@ -619,11 +625,13 @@ loadcs(uint16_t seg) void loadcsjmp(uint16_t seg, uint32_t old_pc) { - uint16_t type, seg2; - uint16_t segdat[4]; - uint32_t addr, newpc; - uint32_t *segdat32 = (uint32_t *) segdat; - x86seg *dt; + uint16_t type; + uint16_t seg2; + uint16_t segdat[4]; + uint32_t addr; + uint32_t newpc; + uint32_t *segdat32 = (uint32_t *) segdat; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { if (!(seg & 0xfffc)) { @@ -743,7 +751,7 @@ loadcsjmp(uint16_t seg, uint32_t old_pc) x86gpf("loadcsjmp(): Non-conforming DPL > CPL", seg2 & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -890,16 +898,24 @@ void loadcscall(uint16_t seg) #endif { - uint16_t seg2, newss; - uint16_t segdat[4], segdat2[4]; - uint32_t addr, oldssbase = ss; - uint32_t oaddr, newpc; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - int count, type; - uint32_t oldss, oldsp, newsp, oldsp2; - uint16_t tempw; - x86seg *dt; + uint16_t seg2; + uint16_t newss; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint32_t addr; + uint32_t oldssbase = ss; + uint32_t oaddr; + uint32_t newpc; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + int count; + int type; + uint32_t oldss; + uint32_t oldsp; + uint32_t newsp; + uint32_t oldsp2; + uint16_t tempw; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { x86seg_log("Protected mode CS load! %04X\n", seg); @@ -1174,7 +1190,7 @@ loadcscall(uint16_t seg) x86gpf("loadcscall(): Call PM Gate Inner DPL > CPL", seg2 & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -1237,12 +1253,18 @@ loadcscall(uint16_t seg) void pmoderetf(int is32, uint16_t off) { - uint16_t segdat[4], segdat2[4], seg, newss; - uint32_t newpc, newsp, addr, oaddr; - uint32_t oldsp = ESP; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t seg; + uint16_t newss; + uint32_t newpc; + uint32_t newsp; + uint32_t addr; + uint32_t oaddr; + uint32_t oldsp = ESP; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; x86seg_log("RETF %i %04X:%04X %08X %04X\n", is32, CS, cpu_state.pc, cr0, cpu_state.eflags); if (is32) { @@ -1467,17 +1489,22 @@ pmoderetf(int is32, uint16_t off) void pmodeint(int num, int soft) { - uint16_t segdat[4], segdat2[4]; - uint16_t segdat3[4]; - uint16_t newss, seg = 0; - int type, new_cpl; - uint32_t addr, oaddr; - uint32_t oldss, oldsp; - uint32_t newsp; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - uint32_t *segdat332 = (uint32_t *) segdat3; - x86seg *dt; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t segdat3[4]; + uint16_t newss; + uint16_t seg = 0; + int type; + int new_cpl; + uint32_t addr; + uint32_t oaddr; + uint32_t oldss; + uint32_t oldsp; + uint32_t newsp; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + uint32_t *segdat332 = (uint32_t *) segdat3; + const x86seg *dt; if ((cpu_state.eflags & VM_FLAG) && (IOPL != 3) && soft) { x86seg_log("V86 banned int\n"); @@ -1661,7 +1688,7 @@ pmodeint(int num, int soft) x86gpf("pmodeint(): DPL != CPL", seg & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -1750,16 +1777,21 @@ pmodeint(int num, int soft) void pmodeiret(int is32) { - uint16_t newss, seg = 0; - uint16_t segdat[4], segdat2[4]; - uint16_t segs[4]; - uint32_t tempflags, flagmask; - uint32_t newpc, newsp; - uint32_t addr, oaddr; - uint32_t oldsp = ESP; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t newss; + uint16_t seg = 0; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t segs[4]; + uint32_t tempflags; + uint32_t flagmask; + uint32_t newpc; + uint32_t newsp; + uint32_t addr; + uint32_t oaddr; + uint32_t oldsp = ESP; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; if (is386 && (cpu_state.eflags & VM_FLAG)) { if (IOPL != 3) { @@ -2058,15 +2090,32 @@ pmodeiret(int is32) void taskswitch286(uint16_t seg, uint16_t *segdat, int is32) { - uint16_t tempw, new_ldt; - uint16_t new_es, new_cs, new_ss, new_ds, new_fs, new_gs; - uint16_t segdat2[4]; - uint32_t base, limit; - uint32_t templ, new_cr3 = 0; - uint32_t new_eax, new_ebx, new_ecx, new_edx, new_esp, new_ebp; - uint32_t new_esi, new_edi, new_pc, new_flags, addr; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t tempw; + uint16_t new_ldt; + uint16_t new_es; + uint16_t new_cs; + uint16_t new_ss; + uint16_t new_ds; + uint16_t new_fs; + uint16_t new_gs; + uint16_t segdat2[4]; + uint32_t base; + uint32_t limit; + uint32_t templ; + uint32_t new_cr3 = 0; + uint32_t new_eax; + uint32_t new_ebx; + uint32_t new_ecx; + uint32_t new_edx; + uint32_t new_esp; + uint32_t new_ebp; + uint32_t new_esi; + uint32_t new_edi; + uint32_t new_pc; + uint32_t new_flags; + uint32_t addr; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; base = segdat[1] | ((segdat[2] & 0x00ff) << 16); limit = segdat[0]; @@ -2447,7 +2496,8 @@ cyrix_write_seg_descriptor(uint32_t addr, x86seg *seg) void cyrix_load_seg_descriptor(uint32_t addr, x86seg *seg) { - uint16_t segdat[4], selector; + uint16_t segdat[4]; + uint16_t selector; segdat[0] = readmemw(0, addr); segdat[1] = readmemw(0, addr + 2); diff --git a/src/cpu/x87.c b/src/cpu/x87.c index 181b7b9ca..3918800aa 100644 --- a/src/cpu/x87.c +++ b/src/cpu/x87.c @@ -17,8 +17,10 @@ #include "386_common.h" #include "softfloat/softfloat-specialize.h" -uint32_t x87_pc_off, x87_op_off; -uint16_t x87_pc_seg, x87_op_seg; +uint32_t x87_pc_off; +uint32_t x87_op_off; +uint16_t x87_pc_seg; +uint16_t x87_op_seg; #ifdef ENABLE_FPU_LOG int fpu_do_log = ENABLE_FPU_LOG; @@ -43,9 +45,8 @@ uint16_t x87_gettag(void) { uint16_t ret = 0; - int c; - for (c = 0; c < 8; c++) { + for (uint8_t c = 0; c < 8; c++) { if (cpu_state.tag[c] == TAG_EMPTY) ret |= X87_TAG_EMPTY << (c * 2); else if (cpu_state.tag[c] & TAG_UINT64) @@ -62,9 +63,7 @@ x87_gettag(void) void x87_settag(uint16_t new_tag) { - int c; - - for (c = 0; c < 8; c++) { + for (uint8_t c = 0; c < 8; c++) { int tag = (new_tag >> (c * 2)) & 3; if (tag == X87_TAG_EMPTY) @@ -152,7 +151,8 @@ FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *stat return 1; } - int aIsNaN = floatx80_is_nan(a), bIsNaN = float32_is_nan(b); + int aIsNaN = floatx80_is_nan(a); + int bIsNaN = float32_is_nan(b); if (aIsNaN | bIsNaN) { *r = FPU_handle_NaN32_Func(a, aIsNaN, b, bIsNaN, status); return 1; @@ -205,7 +205,8 @@ FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *stat return 1; } - int aIsNaN = floatx80_is_nan(a), bIsNaN = float64_is_nan(b); + int aIsNaN = floatx80_is_nan(a); + int bIsNaN = float64_is_nan(b); if (aIsNaN | bIsNaN) { *r = FPU_handle_NaN64_Func(a, aIsNaN, b, bIsNaN, status); return 1; @@ -256,13 +257,13 @@ FPU_status_word_flags_fpu_compare(int float_relation) return (C0 | C2 | C3); case float_relation_greater: - return (0); + return 0; case float_relation_less: - return (C0); + return C0; case float_relation_equal: - return (C3); + return C3; } return (-1); // should never get here @@ -280,11 +281,11 @@ FPU_write_eflags_fpu_compare(int float_relation) break; case float_relation_less: - cpu_state.flags |= (C_FLAG); + cpu_state.flags |= C_FLAG; break; case float_relation_equal: - cpu_state.flags |= (Z_FLAG); + cpu_state.flags |= Z_FLAG; break; default: diff --git a/src/cpu/x87.h b/src/cpu/x87.h index 66d51dbd9..2d8708da4 100644 --- a/src/cpu/x87.h +++ b/src/cpu/x87.h @@ -3,8 +3,10 @@ #define X87_TAG_INVALID 2 #define X87_TAG_EMPTY 3 -extern uint32_t x87_pc_off, x87_op_off; -extern uint16_t x87_pc_seg, x87_op_seg; +extern uint32_t x87_pc_off; +extern uint32_t x87_op_off; +extern uint16_t x87_pc_seg; +extern uint16_t x87_op_seg; static __inline void x87_set_mmx(void) @@ -16,7 +18,7 @@ x87_set_mmx(void) } else { cpu_state.TOP = 0; p = (uint64_t *) cpu_state.tag; - *p = 0x0101010101010101ull; + *p = 0x0101010101010101ULL; } cpu_state.ismmx = 1; } diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index 0db2b3f3c..cde0128dc 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -111,6 +111,7 @@ typedef union static __inline void x87_checkexceptions(void) { + // } static __inline void @@ -169,7 +170,8 @@ x87_pop(void) static __inline int16_t x87_fround16(double b) { - int16_t a, c; + int16_t a; + int16_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -201,7 +203,8 @@ x87_fround16_64(double b) static __inline int32_t x87_fround32(double b) { - int32_t a, c; + int32_t a; + int32_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -233,7 +236,8 @@ x87_fround32_64(double b) static __inline int64_t x87_fround(double b) { - int64_t a, c; + int64_t a; + int64_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -338,9 +342,10 @@ x87_compare(double a, double b) { #ifdef X87_INLINE_ASM uint32_t result; - double ea = a, eb = b; - const uint64_t ia = 0x3fec1a6ff866a936ull; - const uint64_t ib = 0x3fec1a6ff866a938ull; + double ea = a; + double eb = b; + const uint64_t ia = 0x3fec1a6ff866a936ULL; + const uint64_t ib = 0x3fec1a6ff866a938ULL; /* Hack to make CHKCOP happy. */ if (!memcmp(&ea, &ia, 8) && !memcmp(&eb, &ib, 8)) diff --git a/src/cpu/x87_ops_misc.h b/src/cpu/x87_ops_misc.h index cea6e6075..f4d1dd2ea 100644 --- a/src/cpu/x87_ops_misc.h +++ b/src/cpu/x87_ops_misc.h @@ -37,16 +37,17 @@ static int opFXTRACT(uint32_t fetchdat) { x87_conv_t test; - int64_t exp80, exp80final; - double mant; + int64_t exp80; + int64_t exp80final; + double mant; FP_ENTER(); cpu_state.pc++; test.eind.d = ST(0); - exp80 = test.eind.ll & (0x7ff0000000000000ll); - exp80final = (exp80 >> 52) - BIAS64; - mant = test.eind.d / (pow(2.0, (double)exp80final)); - ST(0) = (double)exp80final; + exp80 = test.eind.ll & 0x7ff0000000000000LL; + exp80final = (exp80 >> 52) - BIAS64; + mant = test.eind.d / (pow(2.0, (double) exp80final)); + ST(0) = (double) exp80final; FP_TAG_VALID; x87_push(mant); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); @@ -82,7 +83,7 @@ opFINIT(uint32_t fetchdat) #ifdef USE_NEW_DYNAREC *p = 0; #else - *p = 0x0303030303030303ll; + *p = 0x0303030303030303LL; #endif cpu_state.TOP = 0; cpu_state.ismmx = 0; @@ -410,7 +411,7 @@ FSAVE(void) #ifdef USE_NEW_DYNAREC *p = 0; #else - *p = 0x0303030303030303ll; + *p = 0x0303030303030303LL; #endif cpu_state.TOP = 0; cpu_state.ismmx = 0; @@ -629,7 +630,7 @@ opFLDLN2(uint32_t fetchdat) { FP_ENTER(); cpu_state.pc++; - x87_push_u64(0x3fe62e42fefa39f0ull); + x87_push_u64(0x3fe62e42fefa39f0ULL); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); return 0; diff --git a/src/cpu/x87_ops_sf.h b/src/cpu/x87_ops_sf.h index e70556fea..fb2f790a4 100644 --- a/src/cpu/x87_ops_sf.h +++ b/src/cpu/x87_ops_sf.h @@ -18,7 +18,8 @@ fpu_save_environment(void) switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { case 0x000: { /*16-bit real mode*/ uint16_t tmp; - uint32_t fp_ip, fp_dp; + uint32_t fp_ip; + uint32_t fp_dp; fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; @@ -60,7 +61,9 @@ fpu_save_environment(void) } break; case 0x100: { /*32-bit real mode*/ - uint32_t tmp, fp_ip, fp_dp; + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; @@ -114,7 +117,9 @@ fpu_load_environment(void) switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { case 0x000: { /*16-bit real mode*/ uint16_t tmp; - uint32_t fp_ip, fp_dp; + uint32_t fp_ip; + uint32_t fp_dp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); fp_dp = (tmp & 0xf000) << 4; tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); @@ -156,7 +161,10 @@ fpu_load_environment(void) } break; case 0x100: { /*32-bit real mode*/ - uint32_t tmp, fp_ip, fp_dp; + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); fp_dp = (tmp & 0x0ffff000) << 4; tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); diff --git a/src/cpu/x87_ops_sf_arith.h b/src/cpu/x87_ops_sf_arith.h index 10b100b2a..5dc80b432 100644 --- a/src/cpu/x87_ops_sf_arith.h +++ b/src/cpu/x87_ops_sf_arith.h @@ -203,7 +203,9 @@ sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i static int sf_FADD_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -230,7 +232,9 @@ next_ins: static int sf_FADD_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -258,7 +262,9 @@ next_ins: static int sf_FADDP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -288,7 +294,9 @@ next_ins: static int sf_FDIV_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -315,7 +323,9 @@ next_ins: static int sf_FDIV_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -341,7 +351,9 @@ next_ins: static int sf_FDIVP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -370,7 +382,9 @@ next_ins: static int sf_FDIVR_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -396,7 +410,9 @@ next_ins: static int sf_FDIVR_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -422,7 +438,9 @@ next_ins: static int sf_FDIVRP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -451,7 +469,9 @@ next_ins: static int sf_FMUL_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -478,7 +498,9 @@ next_ins: static int sf_FMUL_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -505,7 +527,9 @@ next_ins: static int sf_FMULP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -534,7 +558,9 @@ next_ins: static int sf_FSUB_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -561,7 +587,9 @@ next_ins: static int sf_FSUB_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -588,7 +616,9 @@ next_ins: static int sf_FSUBP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -617,7 +647,9 @@ next_ins: static int sf_FSUBR_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -644,7 +676,9 @@ next_ins: static int sf_FSUBR_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -671,7 +705,9 @@ next_ins: static int sf_FSUBRP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); diff --git a/src/cpu/x87_ops_sf_compare.h b/src/cpu/x87_ops_sf_compare.h index 34bca6772..0dde17a05 100644 --- a/src/cpu/x87_ops_sf_compare.h +++ b/src/cpu/x87_ops_sf_compare.h @@ -95,7 +95,8 @@ cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i static int sf_FCOM_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -123,7 +124,8 @@ next_ins: static int sf_FCOMP_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -156,7 +158,8 @@ next_ins: static int sf_FCOMPP(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -192,7 +195,8 @@ next_ins: static int sf_FUCOMPP(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -228,7 +232,8 @@ next_ins: static int sf_FCOMI_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -256,7 +261,8 @@ next_ins: static int sf_FCOMIP_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -291,7 +297,8 @@ next_ins: static int sf_FUCOM_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -319,7 +326,8 @@ next_ins: static int sf_FUCOMP_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -352,7 +360,8 @@ next_ins: static int sf_FUCOMI_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -380,7 +389,8 @@ next_ins: static int sf_FUCOMIP_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; diff --git a/src/cpu/x87_ops_sf_misc.h b/src/cpu/x87_ops_sf_misc.h index d8a3d7368..3b468cbf6 100644 --- a/src/cpu/x87_ops_sf_misc.h +++ b/src/cpu/x87_ops_sf_misc.h @@ -2,8 +2,10 @@ static int sf_FXCH_sti(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 st0_reg, sti_reg; - int st0_tag, sti_tag; + floatx80 st0_reg; + floatx80 sti_reg; + int st0_tag; + int sti_tag; FP_ENTER(); FPU_check_pending_exceptions(); diff --git a/src/cpu/x87_ops_sf_trans.h b/src/cpu/x87_ops_sf_trans.h index 5289b2bbf..8f28104bd 100644 --- a/src/cpu/x87_ops_sf_trans.h +++ b/src/cpu/x87_ops_sf_trans.h @@ -104,7 +104,9 @@ next_ins: static int sf_FPATAN(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -132,7 +134,8 @@ static int sf_FXTRACT(uint32_t fetchdat) { struct float_status_t status; - floatx80 a, b; + floatx80 a; + floatx80 b; FP_ENTER(); cpu_state.pc++; @@ -175,10 +178,13 @@ next_ins: static int sf_FPREM1(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; - uint64_t quotient = 0; - int flags, cc; + uint64_t quotient = 0; + int flags; + int cc; FP_ENTER(); cpu_state.pc++; @@ -219,10 +225,13 @@ next_ins: static int sf_FPREM(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; - uint64_t quotient = 0; - int flags, cc; + uint64_t quotient = 0; + int flags; + int cc; FP_ENTER(); cpu_state.pc++; @@ -291,9 +300,11 @@ next_ins: static int sf_FSINCOS(uint32_t fetchdat) { - const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); struct float_status_t status; - floatx80 y, sin_y, cos_y; + floatx80 y; + floatx80 sin_y; + floatx80 cos_y; FP_ENTER(); cpu_state.pc++;