From be79ea78c73b49b54490bfb02a04691da83fadef Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Thu, 10 Aug 2023 15:43:16 -0400 Subject: [PATCH 1/4] sonarlinting and formatting in src/cpu --- src/cpu/386.c | 8 +- src/cpu/386_common.c | 93 +- src/cpu/386_dynarec.c | 23 +- src/cpu/386_dynarec_ops.c | 5 +- src/cpu/8080.c | 20 +- src/cpu/808x.c | 184 ++- src/cpu/808x/queue.c | 3 +- src/cpu/808x/queue.h | 29 +- src/cpu/codegen_timing_486.c | 428 ++--- src/cpu/codegen_timing_686.c | 712 ++++---- src/cpu/codegen_timing_common.c | 181 ++- src/cpu/codegen_timing_common.h | 78 +- src/cpu/codegen_timing_k6.c | 2522 ++++++++++++++--------------- src/cpu/codegen_timing_p6.c | 2356 +++++++++++++-------------- src/cpu/codegen_timing_pentium.c | 1159 ++++++------- src/cpu/codegen_timing_winchip.c | 416 ++--- src/cpu/codegen_timing_winchip2.c | 825 +++++----- src/cpu/cpu.c | 26 +- src/cpu/cpu.h | 18 +- src/cpu/x86.c | 14 +- src/cpu/x86.h | 52 +- src/cpu/x86_ops_3dnow.h | 3 +- src/cpu/x86_ops_arith.h | 69 +- src/cpu/x86_ops_atomic.h | 60 +- src/cpu/x86_ops_bcd.h | 10 +- src/cpu/x86_ops_bit.h | 21 +- src/cpu/x86_ops_call.h | 36 +- src/cpu/x86_ops_fpu.h | 4 +- src/cpu/x86_ops_i686.h | 6 +- src/cpu/x86_ops_jump.h | 13 +- src/cpu/x86_ops_misc.h | 27 +- src/cpu/x86_ops_mmx_mov.h | 4 +- src/cpu/x86_ops_mov_ctrl.h | 44 +- src/cpu/x86_ops_mov_seg.h | 12 +- src/cpu/x86_ops_mul.h | 24 +- src/cpu/x86_ops_pmode.h | 18 +- src/cpu/x86_ops_prefix.h | 3 +- src/cpu/x86_ops_ret.h | 4 +- src/cpu/x86_ops_shift.h | 54 +- src/cpu/x86_ops_stack.h | 16 +- src/cpu/x86_ops_string.h | 18 +- src/cpu/x86_ops_string_2386.h | 18 +- src/cpu/x86seg.c | 194 ++- src/cpu/x87.c | 29 +- src/cpu/x87.h | 8 +- src/cpu/x87_ops.h | 17 +- src/cpu/x87_ops_misc.h | 19 +- src/cpu/x87_ops_sf.h | 16 +- src/cpu/x87_ops_sf_arith.h | 72 +- src/cpu/x87_ops_sf_compare.h | 30 +- src/cpu/x87_ops_sf_misc.h | 6 +- src/cpu/x87_ops_sf_trans.h | 31 +- 52 files changed, 5203 insertions(+), 4835 deletions(-) diff --git a/src/cpu/386.c b/src/cpu/386.c index eebbae29e..11e87cadc 100644 --- a/src/cpu/386.c +++ b/src/cpu/386.c @@ -230,8 +230,12 @@ fetch_ea_16_long(uint32_t rmdat) void exec386_2386(int cycs) { - int vector, tempi, cycdiff, oldcyc; - int cycle_period, ins_cycles; + int vector; + int tempi; + int cycdiff; + int oldcyc; + int cycle_period; + int ins_cycles; uint32_t addr; cycles += cycs; diff --git a/src/cpu/386_common.c b/src/cpu/386_common.c index 5763a787f..f8593fdbe 100644 --- a/src/cpu/386_common.c +++ b/src/cpu/386_common.c @@ -26,6 +26,7 @@ #include "386_common.h" #include "x86_flags.h" #include "x86seg.h" +#include <86box/plat_unused.h> #ifdef USE_DYNAREC # include "codegen.h" @@ -34,19 +35,26 @@ # define CPU_BLOCK_END() #endif -x86seg gdt, ldt, idt, tr; +x86seg gdt; +x86seg ldt; +x86seg idt; +x86seg tr; -uint32_t cr2, cr3, cr4; +uint32_t cr2; +uint32_t cr3; +uint32_t cr4; uint32_t dr[8]; uint32_t use32; int stack32; -uint32_t *eal_r, *eal_w; +uint32_t *eal_r; +uint32_t *eal_w; int nmi_enable = 1; -int alt_access, cpl_override = 0; +int alt_access; +int cpl_override = 0; #ifdef USE_NEW_DYNAREC uint16_t cpu_cur_status = 0; @@ -59,23 +67,33 @@ extern uint8_t *pccache2; extern int optype; extern uint32_t pccache; -int in_sys = 0, unmask_a20_in_smm = 0; -uint32_t old_rammask = 0xffffffff; +int in_sys = 0; +int unmask_a20_in_smm = 0; +uint32_t old_rammask = 0xffffffff; int soft_reset_mask = 0; int smi_latched = 0; -int smm_in_hlt = 0, smi_block = 0; +int smm_in_hlt = 0; +int smi_block = 0; int prefetch_prefixes = 0; -int tempc, oldcpl, optype, inttype, oddeven = 0; +int tempc; +int oldcpl; +int optype; +int inttype; +int oddeven = 0; int timetolive; uint16_t oldcs; -uint32_t oldds, oldss, olddslimit, oldsslimit, - olddslimitw, oldsslimitw; +uint32_t oldds; +uint32_t oldss; +uint32_t olddslimit; +uint32_t oldsslimit; +uint32_t olddslimitw; +uint32_t oldsslimitw; uint32_t oxpc; uint32_t rmdat32; uint32_t backupregs[16]; @@ -99,8 +117,10 @@ int opcode_length[256] = { 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 1, 1, 1, 1, /* 0xex */ 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 3 }; /* 0xfx */ -uint32_t addr64, addr64_2; -uint32_t addr64a[8], addr64a_2[8]; +uint32_t addr64; +uint32_t addr64_2; +uint32_t addr64a[8]; +uint32_t addr64a_2[8]; static pc_timer_t *cpu_fast_off_timer = NULL; static double cpu_fast_off_period = 0.0; @@ -494,12 +514,10 @@ smm_seg_load(x86seg *s) static void smram_save_state_p5(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_P5_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_P5_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_P5_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -588,9 +606,7 @@ smram_save_state_p5(uint32_t *saved_state, int in_hlt) static void smram_restore_state_p5(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_P5_EAX - n]; if (saved_state[SMRAM_FIELD_P5_AUTOHALT_RESTART] & 0xffff) @@ -701,12 +717,10 @@ smram_restore_state_p5(uint32_t *saved_state) static void smram_save_state_p6(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_P6_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_P6_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_P6_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -787,9 +801,7 @@ smram_save_state_p6(uint32_t *saved_state, int in_hlt) static void smram_restore_state_p6(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_P6_EAX - n]; if (saved_state[SMRAM_FIELD_P6_AUTOHALT_RESTART] & 0xffff) @@ -894,12 +906,10 @@ smram_restore_state_p6(uint32_t *saved_state) static void smram_save_state_amd_k(uint32_t *saved_state, int in_hlt) { - int n = 0; - saved_state[SMRAM_FIELD_AMD_K_SMM_REVISION_ID] = SMM_REVISION_ID; saved_state[SMRAM_FIELD_AMD_K_SMBASE_OFFSET] = smbase; - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) saved_state[SMRAM_FIELD_AMD_K_EAX - n] = cpu_state.regs[n].l; if (in_hlt) @@ -979,9 +989,7 @@ smram_save_state_amd_k(uint32_t *saved_state, int in_hlt) static void smram_restore_state_amd_k(uint32_t *saved_state) { - int n = 0; - - for (n = 0; n < 8; n++) + for (uint8_t n = 0; n < 8; n++) cpu_state.regs[n].l = saved_state[SMRAM_FIELD_AMD_K_EAX - n]; if (saved_state[SMRAM_FIELD_AMD_K_AUTOHALT_RESTART] & 0xffff) @@ -1080,7 +1088,7 @@ smram_restore_state_amd_k(uint32_t *saved_state) } static void -smram_save_state_cyrix(uint32_t *saved_state, int in_hlt) +smram_save_state_cyrix(uint32_t *saved_state, UNUSED(int in_hlt)) { saved_state[0] = dr[7]; saved_state[1] = cpu_state.flags | (cpu_state.eflags << 16); @@ -1104,7 +1112,7 @@ smram_restore_state_cyrix(uint32_t *saved_state) void enter_smm(int in_hlt) { - uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE], n; + uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE]; uint32_t smram_state = smbase + 0x10000; /* If it's a CPU on which SMM is not supported, do nothing. */ @@ -1231,7 +1239,7 @@ enter_smm(int in_hlt) writememl(0, smram_state - 0x18, saved_state[5]); writememl(0, smram_state - 0x24, saved_state[6]); } else { - for (n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { + for (uint8_t n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { smram_state -= 4; writememl(0, smram_state, saved_state[n]); } @@ -1291,7 +1299,7 @@ enter_smm_check(int in_hlt) void leave_smm(void) { - uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE], n; + uint32_t saved_state[SMM_SAVE_STATE_MAP_SIZE]; uint32_t smram_state = smbase + 0x10000; /* If it's a CPU on which SMM is not supported (or not implemented in 86Box), do nothing. */ @@ -1312,7 +1320,7 @@ leave_smm(void) cyrix_load_seg_descriptor(smram_state - 0x20, &cpu_state.seg_cs); saved_state[6] = readmeml(0, smram_state - 0x24); } else { - for (n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { + for (uint8_t n = 0; n < SMM_SAVE_STATE_MAP_SIZE; n++) { smram_state -= 4; saved_state[n] = readmeml(0, smram_state); x386_common_log("Reading %08X from memory at %08X to array element %i\n", saved_state[n], smram_state, n); @@ -1483,7 +1491,8 @@ int x86_int_sw_rm(int num) { uint32_t addr; - uint16_t new_pc, new_cs; + uint16_t new_pc; + uint16_t new_cs; flags_rebuild(); cycles -= timing_int; @@ -1571,8 +1580,10 @@ checkio(uint32_t port, int mask) int divl(uint32_t val) { - uint64_t num, quo; - uint32_t rem, quo32; + uint64_t num; + uint64_t quo; + uint32_t rem; + uint32_t quo32; if (val == 0) { divexcp(); @@ -1598,8 +1609,10 @@ divl(uint32_t val) int idivl(int32_t val) { - int64_t num, quo; - int32_t rem, quo32; + int64_t num; + int64_t quo; + int32_t rem; + int32_t quo32; if (val == 0) { divexcp(); diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 21eb7c2b7..38261a234 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -46,7 +46,8 @@ #define CPU_BLOCK_END() cpu_block_end = 1 -int inrecomp = 0, cpu_block_end = 0; +int inrecomp = 0; +int cpu_block_end = 0; int cpu_end_block_after_ins = 0; #ifdef ENABLE_386_DYNAREC_LOG @@ -334,7 +335,8 @@ exec386_dynarec_int(void) static __inline void exec386_dynarec_dyn(void) { - uint32_t start_pc = 0, phys_addr = get_phys(cs + cpu_state.pc); + uint32_t start_pc = 0; + uint32_t phys_addr = get_phys(cs + cpu_state.pc); int hash = HASH(phys_addr); # ifdef USE_NEW_DYNAREC codeblock_t *block = &codeblock[codeblock_hash[hash]]; @@ -674,10 +676,13 @@ exec386_dynarec_dyn(void) void exec386_dynarec(int cycs) { - int vector, tempi; + int vector; + int tempi; int cycdiff; - int oldcyc, oldcyc2; - uint64_t oldtsc, delta; + int oldcyc; + int oldcyc2; + uint64_t oldtsc; + uint64_t delta; int cyc_period = cycs / 2000; /*5us*/ @@ -794,8 +799,12 @@ exec386_dynarec(int cycs) void exec386(int cycs) { - int vector, tempi, cycdiff, oldcyc; - int cycle_period, ins_cycles; + int vector; + int tempi; + int cycdiff; + int oldcyc; + int cycle_period; + int ins_cycles; uint32_t addr; cycles += cycs; diff --git a/src/cpu/386_dynarec_ops.c b/src/cpu/386_dynarec_ops.c index f3e2f6e6e..f46062bcc 100644 --- a/src/cpu/386_dynarec_ops.c +++ b/src/cpu/386_dynarec_ops.c @@ -21,6 +21,7 @@ #include <86box/pic.h> #include <86box/gdbstub.h> #include "codegen.h" +#include <86box/plat_unused.h> #define CPU_BLOCK_END() cpu_block_end = 1 @@ -31,7 +32,7 @@ #include "386_common.h" static __inline void -fetch_ea_32_long(uint32_t rmdat) +fetch_ea_32_long(UNUSED(uint32_t rmdat)) { eal_r = eal_w = NULL; easeg = cpu_state.ea_seg->base; @@ -45,7 +46,7 @@ fetch_ea_32_long(uint32_t rmdat) } static __inline void -fetch_ea_16_long(uint32_t rmdat) +fetch_ea_16_long(UNUSED(uint32_t rmdat)) { eal_r = eal_w = NULL; easeg = cpu_state.ea_seg->base; diff --git a/src/cpu/8080.c b/src/cpu/8080.c index 6f3dd4267..7a7e7b96c 100644 --- a/src/cpu/8080.c +++ b/src/cpu/8080.c @@ -19,15 +19,21 @@ #include <86box/timer.h> #include <86box/i8080.h> #include <86box/mem.h> +#include <86box/plat_unused.h> -static int completed = 1; -static int in_rep = 0, repeating = 0, rep_c_flag = 0; -static int oldc, cycdiff; +static int completed = 1; +static int in_rep = 0; +static int repeating = 0; +static int rep_c_flag = 0; +static int oldc; +static int cycdiff; #ifdef UNUSED_8080_VARS static int prefetching = 1; -static int refresh = 0, clear_lock = 0; +static int refresh = 0; +static int clear_lock = 0; -static uint32_t cpu_src = 0, cpu_dest = 0; +static uint32_t cpu_src = 0; +static uint32_t cpu_dest = 0; static uint32_t cpu_data = 0; #endif @@ -43,7 +49,7 @@ clock_end(void) int diff = cycdiff - cycles; /* On 808x systems, clock speed is usually crystal frequency divided by an integer. */ - tsc += (uint64_t) diff * ((uint64_t) xt_cpu_multi >> 32ULL); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ + tsc += (uint64_t) diff * (xt_cpu_multi >> 32ULL); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ if (TIMER_VAL_LESS_THAN_VAL(timer_target, (uint32_t) tsc)) timer_process(); } @@ -237,7 +243,7 @@ setreg_i8080(i8080 *cpu, uint8_t reg, uint8_t val) } void -interpret_exec8080(i8080 *cpu, uint8_t opcode) +interpret_exec8080(UNUSED(i8080 *cpu), uint8_t opcode) { switch (opcode) { case 0x00: diff --git a/src/cpu/808x.c b/src/cpu/808x.c index e0419a9f7..690a6e0f9 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -35,6 +35,8 @@ #include <86box/ppi.h> #include <86box/timer.h> #include <86box/gdbstub.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> /* Is the CPU 8088 or 8086. */ int is8086 = 0; @@ -46,7 +48,8 @@ uint32_t custom_nmi_vector = 0x00000000; static uint8_t pfq[6]; /* Variables to aid with the prefetch queue operation. */ -static int biu_cycles = 0, pfq_pos = 0; +static int biu_cycles = 0; +static int pfq_pos = 0; /* The IP equivalent of the current prefetch queue position. */ static uint16_t pfq_ip; @@ -57,30 +60,37 @@ static x86seg *_opseg[4]; static int noint = 0; static int in_lock = 0; -static int cpu_alu_op, pfq_size; +static int cpu_alu_op; +static int pfq_size; -static uint32_t cpu_src = 0, cpu_dest = 0; +static uint32_t cpu_src = 0; +static uint32_t cpu_dest = 0; static uint32_t cpu_data = 0; static uint16_t last_addr = 0x0000; static uint32_t *ovr_seg = NULL; -static int prefetching = 1, completed = 1; -static int in_rep = 0, repeating = 0, rep_c_flag = 0; -static int oldc, clear_lock = 0; -static int refresh = 0, cycdiff; +static int prefetching = 1; +static int completed = 1; +static int in_rep = 0; +static int repeating = 0; +static int rep_c_flag = 0; +static int oldc; +static int clear_lock = 0; +static int refresh = 0; +static int cycdiff; -static int access_code = 0; -static int hlda = 0; -static int not_ready = 0; +static int access_code = 0; +static int hlda = 0; +static int not_ready = 0; static int bus_request_type = 0; -static int pic_data = -1; -static int last_was_code = 0; -static uint16_t mem_data = 0; -static uint32_t mem_seg = 0; -static uint16_t mem_addr = 0; -static int schedule_fetch = 1; -static int pasv = 0; +static int pic_data = -1; +static int last_was_code = 0; +static uint16_t mem_data = 0; +static uint32_t mem_seg = 0; +static uint16_t mem_addr = 0; +static int schedule_fetch = 1; +static int pasv = 0; #define BUS_OUT 1 #define BUS_HIGH 2 @@ -204,7 +214,7 @@ clock_end(void) int diff = cycdiff - cycles; /* On 808x systems, clock speed is usually crystal frequency divided by an integer. */ - tsc += ((uint64_t) diff * ((uint64_t) xt_cpu_multi >> 32ULL)); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ + tsc += ((uint64_t) diff * (xt_cpu_multi >> 32ULL)); /* Shift xt_cpu_multi by 32 bits to the right and then multiply. */ if (TIMER_VAL_LESS_THAN_VAL(timer_target, (uint32_t) tsc)) timer_process(); } @@ -423,9 +433,7 @@ run_dma_cycle(int idle) static void cycles_idle(int c) { - int d; - - for (d = 0; d < c; d++) { + for (int d = 0; d < c; d++) { x808x_log("[%04X:%04X] %02X TI\n", CS, cpu_state.pc, opcode); cycles_forward(1); @@ -500,14 +508,12 @@ bus_init(void) static void wait(int c, int bus) { - int d; - if (c < 0) pclog("Negative cycles: %i!\n", c); x808x_log("[%04X:%04X] %02X %i cycles (%i)\n", CS, cpu_state.pc, opcode, c, bus); - for (d = 0; d < c; d++) { + for (int d = 0; d < c; d++) { x808x_log("[%04X:%04X] %02X cycle %i BIU\n", CS, cpu_state.pc, opcode, d); cycles_biu(bus, !d); x808x_log("[%04X:%04X] %02X cycle %i EU\n", CS, cpu_state.pc, opcode, d); @@ -530,12 +536,12 @@ sub_cycles(int c) void resub_cycles(int old_cycles) { - int i, cyc_diff = 0; + int cyc_diff = 0; if (old_cycles > cycles) { cyc_diff = old_cycles - cycles; - for (i = 0; i < cyc_diff; i++) { + for (int i = 0; i < cyc_diff; i++) { if (not_ready > 0) not_ready--; } @@ -934,7 +940,7 @@ reset_808x(int hard) _opseg[2] = &cpu_state.seg_ss; _opseg[3] = &cpu_state.seg_ds; - pfq_size = (is8086) ? 6 : 4; + pfq_size = is8086 ? 6 : 4; pfq_clear(); } @@ -1205,8 +1211,10 @@ pop(void) static void interrupt(uint16_t addr) { - uint16_t old_cs, old_ip; - uint16_t new_cs, new_ip; + uint16_t old_cs; + uint16_t old_ip; + uint16_t new_cs; + uint16_t new_ip; uint16_t tempf; addr <<= 2; @@ -1242,8 +1250,10 @@ interrupt_808x(uint16_t addr) static void custom_nmi(void) { - uint16_t old_cs, old_ip; - uint16_t new_cs, new_ip; + uint16_t old_cs; + uint16_t old_ip; + uint16_t new_cs; + uint16_t new_ip; uint16_t tempf; cpu_state.eaaddr = 0x0002; @@ -1376,7 +1386,7 @@ rep_interrupt(void) } static int -rep_action(int bits) +rep_action(UNUSED(int bits)) { uint16_t t; @@ -1562,14 +1572,14 @@ alu_op(int bits) case 2: if (cpu_state.flags & C_FLAG) cpu_src++; - /* Fall through. */ + fallthrough; case 0: add(bits); break; case 3: if (cpu_state.flags & C_FLAG) cpu_src++; - /* Fall through. */ + fallthrough; case 5: case 7: sub(bits); @@ -1600,10 +1610,11 @@ mul(uint16_t a, uint16_t b) { int negate = 0; int bit_count = 8; - int carry, i; + int carry; uint16_t high_bit = 0x80; uint16_t size_mask; - uint16_t c, r; + uint16_t c; + uint16_t r; size_mask = (1 << bit_count) - 1; @@ -1644,7 +1655,7 @@ mul(uint16_t a, uint16_t b) a &= size_mask; carry = (a & 1) != 0; a >>= 1; - for (i = 0; i < bit_count; ++i) { + for (int i = 0; i < bit_count; ++i) { wait(7, 0); if (carry) { cpu_src = c; @@ -1705,7 +1716,7 @@ set_pzs(int bits) } static void -set_co_mul(int bits, int carry) +set_co_mul(UNUSED(int bits), int carry) { set_cf(carry); set_of(carry); @@ -1718,10 +1729,11 @@ set_co_mul(int bits, int carry) static int x86_div(uint16_t l, uint16_t h) { - int b, bit_count = 8; + int bit_count = 8; int negative = 0; int dividend_negative = 0; - int size_mask, carry; + int size_mask; + int carry; uint16_t r; if (opcode & 1) { @@ -1765,7 +1777,7 @@ x86_div(uint16_t l, uint16_t h) wait(1, 0); wait(2, 0); carry = 1; - for (b = 0; b < bit_count; ++b) { + for (int b = 0; b < bit_count; ++b) { r = (l << 1) + (carry ? 1 : 0); carry = top_bit(l, bit_count); l = r; @@ -1948,20 +1960,46 @@ cpu_outw(uint16_t port, uint16_t val) void execx86(int cycs) { - uint8_t temp = 0, temp2, old_af, nests; - uint8_t temp_val, temp_al, bit, handled = 0; - uint8_t odd, zero, nibbles_count, destcmp; - uint8_t destbyte, srcbyte, nibble_result, bit_length; + uint8_t temp = 0; + uint8_t temp2; + uint8_t old_af; + uint8_t nests; + uint8_t temp_val; + uint8_t temp_al; + uint8_t bit; + uint8_t handled = 0; + uint8_t odd; + uint8_t zero; + uint8_t nibbles_count; + uint8_t destcmp; + uint8_t destbyte; + uint8_t srcbyte; + uint8_t nibble_result; + uint8_t bit_length; uint8_t bit_offset; int8_t nibble_result_s; - uint16_t addr, tempw, new_cs, new_ip; - uint16_t tempw_int, size, tempbp, lowbound; - uint16_t highbound, regval, orig_sp, wordtopush; - uint16_t immediate, old_flags; - uint16_t tmpa; + uint16_t addr; + uint16_t tempw; + uint16_t new_cs; + uint16_t new_ip; + uint16_t tempw_int; + uint16_t size; + uint16_t tempbp; + uint16_t lowbound; + uint16_t highbound; + uint16_t regval; + uint16_t orig_sp; + uint16_t wordtopush; + uint16_t immediate; + uint16_t old_flags; + uint16_t tmpa; int bits; - uint32_t dest_seg, i, carry, nibble; - uint32_t srcseg, byteaddr; + uint32_t dest_seg; + uint32_t i; + uint32_t carry; + uint32_t nibble; + uint32_t srcseg; + uint32_t byteaddr; cycles += cycs; @@ -1970,7 +2008,9 @@ execx86(int cycs) if (!repeating) { cpu_state.oldpc = cpu_state.pc; - // opcode = pfq_fetchb(); +#if 0 + opcode = pfq_fetchb(); +#endif opcode = pfq_fetchb_common(); handled = 0; oldc = cpu_state.flags & C_FLAG; @@ -2498,7 +2538,7 @@ execx86(int cycs) } for (i = 0; i < bit_length; i++) { byteaddr = (es) + DI; - writememb(es, DI, (read_mem_b(byteaddr) & ~(1 << (bit_offset))) | ((!!(AX & (1 << i))) << bit_offset)); + writememb(es, DI, (read_mem_b(byteaddr) & ~(1 << bit_offset)) | ((!!(AX & (1 << i))) << bit_offset)); bit_offset++; if (bit_offset == 8) { DI++; @@ -3063,7 +3103,7 @@ execx86(int cycs) bits = 8 << (opcode & 1); wait(2, 0); cpu_state.eaaddr = pfq_fetchw(); - set_accum(bits, readmem((ovr_seg ? *ovr_seg : ds))); + set_accum(bits, readmem(ovr_seg ? *ovr_seg : ds)); break; case 0xA2: case 0xA3: @@ -3453,55 +3493,55 @@ execx86(int cycs) if (fpu_softfloat) { switch (opcode) { case 0xD8: - ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f](rmdat); break; case 0xD9: - ops_sf_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_d9[rmdat & 0xff](rmdat); break; case 0xDA: - ops_sf_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_da[rmdat & 0xff](rmdat); break; case 0xDB: - ops_sf_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_db[rmdat & 0xff](rmdat); break; case 0xDC: - ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f](rmdat); break; case 0xDD: - ops_sf_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_dd[rmdat & 0xff](rmdat); break; case 0xDE: - ops_sf_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_de[rmdat & 0xff](rmdat); break; case 0xDF: - ops_sf_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + ops_sf_fpu_8087_df[rmdat & 0xff](rmdat); break; } } else { switch (opcode) { case 0xD8: - ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_fpu_8087_d8[(rmdat >> 3) & 0x1f](rmdat); break; case 0xD9: - ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_d9[rmdat & 0xff](rmdat); break; case 0xDA: - ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_da[rmdat & 0xff](rmdat); break; case 0xDB: - ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_db[rmdat & 0xff](rmdat); break; case 0xDC: - ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + ops_fpu_8087_dc[(rmdat >> 3) & 0x1f](rmdat); break; case 0xDD: - ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_dd[rmdat & 0xff](rmdat); break; case 0xDE: - ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_de[rmdat & 0xff](rmdat); break; case 0xDF: - ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + ops_fpu_8087_df[rmdat & 0xff](rmdat); break; } } @@ -3800,7 +3840,7 @@ execx86(int cycs) if (cpu_mod != 3) wait(1, 0); wait(4, 0); - push((uint16_t *) &(cpu_data)); + push((uint16_t *) &cpu_data); break; } break; diff --git a/src/cpu/808x/queue.c b/src/cpu/808x/queue.c index 66fd91e32..b37ee0fb0 100644 --- a/src/cpu/808x/queue.c +++ b/src/cpu/808x/queue.c @@ -44,8 +44,7 @@ /* NOTE: When porting from Rust to C, please use uintptr_t and not size_t, so it can be printed with PRIuPTR. */ -typedef struct queue_t -{ +typedef struct queue_t { uintptr_t size; uintptr_t len; uintptr_t back; diff --git a/src/cpu/808x/queue.h b/src/cpu/808x/queue.h index b8d21ec85..7c1998295 100644 --- a/src/cpu/808x/queue.h +++ b/src/cpu/808x/queue.h @@ -17,27 +17,26 @@ #ifndef EMU_QUEUE_H #define EMU_QUEUE_H -typedef enum queue_delay_t -{ +typedef enum queue_delay_t { DELAY_READ, DELAY_WRITE, DELAY_NONE } queue_delay_t; -#define FLAG_PRELOADED 0x8000 +#define FLAG_PRELOADED 0x8000 -extern void queue_set_size(uintptr_t size); -extern uintptr_t queue_get_len(void); -extern int queue_is_full(void); -extern uint16_t queue_get_preload(void); -extern int queue_has_preload(void); -extern void queue_set_preload(void); -extern void queue_push8(uint8_t byte); -extern void queue_push16(uint16_t word); -extern uint8_t queue_pop(void); -extern queue_delay_t queue_get_delay(void); -extern void queue_flush(void); +extern void queue_set_size(uintptr_t size); +extern uintptr_t queue_get_len(void); +extern int queue_is_full(void); +extern uint16_t queue_get_preload(void); +extern int queue_has_preload(void); +extern void queue_set_preload(void); +extern void queue_push8(uint8_t byte); +extern void queue_push16(uint16_t word); +extern uint8_t queue_pop(void); +extern queue_delay_t queue_get_delay(void); +extern void queue_flush(void); -extern void queue_init(void); +extern void queue_init(void); #endif /*EMU_QUEUE_H*/ diff --git a/src/cpu/codegen_timing_486.c b/src/cpu/codegen_timing_486.c index 2fe5ce417..e862b123e 100644 --- a/src/cpu/codegen_timing_486.c +++ b/src/cpu/codegen_timing_486.c @@ -3,8 +3,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -12,11 +14,11 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -#define CYCLES(c) (int *)c -#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) +#define CYCLES(c) (int *) c +#define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = -{ +static int *opcode_timings[256] = { + // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -36,10 +38,11 @@ static int *opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_mod3[256] = -{ +static int *opcode_timings_mod3[256] = { + // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -59,10 +62,11 @@ static int *opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_0f[256] = -{ +static int *opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -82,9 +86,10 @@ static int *opcode_timings_0f[256] = /*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, + // clang-format on }; -static int *opcode_timings_0f_mod3[256] = -{ +static int *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -104,60 +109,69 @@ static int *opcode_timings_0f_mod3[256] = /*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, + // clang-format on }; -static int *opcode_timings_shift[8] = -{ +static int *opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) }; -static int *opcode_timings_shift_mod3[8] = -{ +static int *opcode_timings_shift_mod3[8] = { +// clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static int *opcode_timings_f6[8] = -{ +static int *opcode_timings_f6[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f6_mod3[8] = -{ +static int *opcode_timings_f6_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f7[8] = -{ +static int *opcode_timings_f7[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_f7_mod3[8] = -{ +static int *opcode_timings_f7_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) }; -static int *opcode_timings_ff[8] = -{ +static int *opcode_timings_ff[8] = { +// clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL }; -static int *opcode_timings_ff_mod3[8] = -{ +static int *opcode_timings_ff_mod3[8] = { +// clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_d8[8] = -{ +static int *opcode_timings_d8[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_d8_mod3[8] = -{ +static int *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_d9[8] = -{ +static int *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(3), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) + // clang-format on }; -static int *opcode_timings_d9_mod3[64] = -{ +static int *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), /*FXCH*/ @@ -174,26 +188,29 @@ static int *opcode_timings_d9_mod3[64] = CYCLES(140), CYCLES(196), CYCLES(200), CYCLES(218), NULL, NULL, CYCLES(3), CYCLES(3), /* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ CYCLES(70), NULL, CYCLES(83), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(257), CYCLES(257) + // clang-format on }; -static int *opcode_timings_da[8] = -{ +static int *opcode_timings_da[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_da_mod3[8] = -{ +static int *opcode_timings_da_mod3[8] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL + // clang-format on }; - -static int *opcode_timings_db[8] = -{ +static int *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(9), NULL, CYCLES(28), CYCLES(28), NULL, CYCLES(5), NULL, CYCLES(6) + // clang-format on }; -static int *opcode_timings_db_mod3[64] = -{ +static int *opcode_timings_db_mod3[64] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -203,219 +220,242 @@ static int *opcode_timings_db_mod3[64] = NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // clang-format on }; -static int *opcode_timings_dc[8] = -{ +static int *opcode_timings_dc[8] = { + // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_dc_mod3[8] = -{ +static int *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(8), CYCLES(16), NULL, NULL, CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_dd[8] = -{ +static int *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(3), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(3) + // clang-format on }; -static int *opcode_timings_dd_mod3[8] = -{ +static int *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL + // clang-format on }; -static int *opcode_timings_de[8] = -{ +static int *opcode_timings_de[8] = { + // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(8), CYCLES(11), CYCLES(4), CYCLES(4), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_de_mod3[8] = -{ +static int *opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(8), CYCLES(16), NULL, CYCLES(5), CYCLES(8), CYCLES(8), CYCLES(73), CYCLES(73) + // clang-format on }; -static int *opcode_timings_df[8] = -{ +static int *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(13), NULL, CYCLES(29), CYCLES(29), NULL, CYCLES(10), CYCLES(172), CYCLES(28) + // clang-format on }; -static int *opcode_timings_df_mod3[8] = -{ +static int *opcode_timings_df_mod3[8] = { + // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), NULL, NULL + // clang-format on }; -static int *opcode_timings_8x[8] = -{ +static int *opcode_timings_8x[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_8x_mod3[8] = -{ +static int *opcode_timings_8x_mod3[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_81[8] = -{ +static int *opcode_timings_81[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int *opcode_timings_81_mod3[8] = -{ +static int *opcode_timings_81_mod3[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static inline int COUNT(int *c, int op_32) +static inline int +COUNT(int *c, int op_32) { - if ((uintptr_t)c <= 10000) - return (int)(uintptr_t)c; - if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - return *c; + if ((uintptr_t) c <= 10000) + return (int) (uintptr_t) c; + if (((uintptr_t) c & ~0xffff) == (-1 & ~0xffff)) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + return *c; } -void codegen_timing_486_block_start(void) +void +codegen_timing_486_block_start(void) { - regmask_modified = 0; + regmask_modified = 0; } -void codegen_timing_486_start(void) +void +codegen_timing_486_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -void codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_486_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); - last_prefix = prefix; + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; } -void codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_486_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - int **timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + int **timings; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - timing_count += COUNT(timings[opcode], op_32); - if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) - timing_count++; /*AGI stall*/ - codegen_block_cycles += timing_count; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -void codegen_timing_486_block_end(void) +void +codegen_timing_486_block_end(void) { + // } -codegen_timing_t codegen_timing_486 = -{ - codegen_timing_486_start, - codegen_timing_486_prefix, - codegen_timing_486_opcode, - codegen_timing_486_block_start, - codegen_timing_486_block_end, - NULL +codegen_timing_t codegen_timing_486 = { + codegen_timing_486_start, + codegen_timing_486_prefix, + codegen_timing_486_opcode, + codegen_timing_486_block_start, + codegen_timing_486_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_686.c b/src/cpu/codegen_timing_686.c index 7d7f4042d..a6800c5b2 100644 --- a/src/cpu/codegen_timing_686.c +++ b/src/cpu/codegen_timing_686.c @@ -13,8 +13,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -22,7 +24,7 @@ #include "codegen_timing_common.h" /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 31) +#define CYCLES_HAS_MULTI (1 << 31) #define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) @@ -31,39 +33,39 @@ /*Instruction follows either register timing, read-modify, or read-modify-write. May be pairable*/ -#define CYCLES_REG (1 << 0) -#define CYCLES_RM (1 << 0) -#define CYCLES_RMW (1 << 0) +#define CYCLES_REG (1 << 0) +#define CYCLES_RM (1 << 0) +#define CYCLES_RMW (1 << 0) #define CYCLES_BRANCH (1 << 0) -#define CYCLES_MASK ((1 << 7) - 1) +#define CYCLES_MASK ((1 << 7) - 1) /*Instruction does not pair*/ #define PAIR_NP (0 << 29) /*Instruction pairs in X pipe only*/ -#define PAIR_X (1 << 29) +#define PAIR_X (1 << 29) /*Instruction pairs in X pipe only, and can not pair with a following instruction*/ -#define PAIR_X_BRANCH (2 << 29) +#define PAIR_X_BRANCH (2 << 29) /*Instruction pairs in both X and Y pipes*/ -#define PAIR_XY (3 << 29) +#define PAIR_XY (3 << 29) #define PAIR_MASK (3 << 29) -#define INVALID 0 +#define INVALID 0 -static int prev_full; -static uint32_t prev_opcode; +static int prev_full; +static uint32_t prev_opcode; static uint32_t *prev_timings; -static uint32_t prev_op_32; -static uint32_t prev_regmask; +static uint32_t prev_op_32; +static uint32_t prev_regmask; static uint64_t *prev_deps; -static uint32_t prev_fetchdat; +static uint32_t prev_fetchdat; static uint32_t last_regmask_modified; static uint32_t regmask_modified; -static uint32_t opcode_timings[256] = -{ +static uint32_t opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM, PAIR_XY | CYCLES_RM, /* ADD ADD PUSH ES POP ES*/ @@ -196,10 +198,11 @@ static uint32_t opcode_timings[256] = PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), /* CLD STD INCDEC*/ PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_RMW, INVALID + // clang-format on }; -static uint32_t opcode_timings_mod3[256] = -{ +static uint32_t opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, /* ADD ADD PUSH ES POP ES*/ @@ -333,10 +336,11 @@ static uint32_t opcode_timings_mod3[256] = PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), /* CLD STD INCDEC*/ PAIR_XY | CYCLES(7), PAIR_XY | CYCLES(7), PAIR_XY | CYCLES_REG, INVALID + // clang-format on }; -static uint32_t opcode_timings_0f[256] = -{ +static uint32_t opcode_timings_0f[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -416,9 +420,10 @@ static uint32_t opcode_timings_0f[256] = INVALID, PAIR_X | CYCLES_RM, INVALID, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, PAIR_X | CYCLES_RM, INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = -{ +static uint32_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -497,106 +502,122 @@ static uint32_t opcode_timings_0f_mod3[256] = INVALID, PAIR_X | CYCLES_REG, INVALID, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, PAIR_X | CYCLES_REG, INVALID, + // clang-format on }; -static uint32_t opcode_timings_shift[8] = -{ +static uint32_t opcode_timings_shift[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = -{ +static uint32_t opcode_timings_shift_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + // clang-format on }; -static uint32_t opcode_timings_shift_imm[8] = -{ +static uint32_t opcode_timings_shift_imm[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, + // clang-format on }; -static uint32_t opcode_timings_shift_imm_mod3[8] = -{ +static uint32_t opcode_timings_shift_imm_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES(3), PAIR_XY | CYCLES(4), PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, + // clang-format on }; -static uint32_t opcode_timings_shift_cl[8] = -{ +static uint32_t opcode_timings_shift_cl[8] = { + // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), + // clang-format on }; -static uint32_t opcode_timings_shift_cl_mod3[8] = -{ +static uint32_t opcode_timings_shift_cl_mod3[8] = { + // clang-format off PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(8), PAIR_XY | CYCLES(9), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), PAIR_XY | CYCLES(2), + // clang-format on }; -static uint32_t opcode_timings_f6[8] = -{ +static uint32_t opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_RM, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) + // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = -{ +static uint32_t opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(18) + // clang-format on }; -static uint32_t opcode_timings_f7[8] = -{ +static uint32_t opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) + // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = -{ +static uint32_t opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_XY | CYCLES_REG, INVALID, PAIR_XY | CYCLES(1), PAIR_XY | CYCLES(1), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(4,10), PAIR_NP | CYCLES_MULTI(19,27), PAIR_NP | CYCLES_MULTI(22,30) + // clang-format on }; -static uint32_t opcode_timings_ff[8] = -{ +static uint32_t opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), /* JMP JMP far PUSH*/ PAIR_X_BRANCH | CYCLES(3), PAIR_NP | CYCLES(5), PAIR_XY | CYCLES(1), INVALID + // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = -{ +static uint32_t opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), /* JMP JMP far PUSH*/ PAIR_X_BRANCH | CYCLES(1), PAIR_XY | CYCLES(5), PAIR_XY | CYCLES(2), INVALID + // clang-format on }; -static uint32_t opcode_timings_d8[8] = -{ +static uint32_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), /* FSUBs FSUBRs FDIVs FDIVRs*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = -{ +static uint32_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(6), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), /* FSUB FSUBR FDIV FDIVR*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_d9[8] = -{ +static uint32_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FLDENV FLDCW FSTENV FSTCW*/ PAIR_X | CYCLES(30), PAIR_X | CYCLES(4), PAIR_X | CYCLES(24), PAIR_X | CYCLES(5) + // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = -{ +static uint32_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), @@ -625,31 +646,34 @@ static uint32_t opcode_timings_d9_mod3[64] = PAIR_X | CYCLES(91), INVALID, PAIR_X | CYCLES(60), PAIR_X | CYCLES(161), /* opFRNDINT opFSCALE opFSIN opFCOS*/ PAIR_X | CYCLES(20), PAIR_X | CYCLES(14), PAIR_X | CYCLES(140), PAIR_X | CYCLES(141) + // clang-format on }; -static uint32_t opcode_timings_da[8] = -{ +static uint32_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ PAIR_X | CYCLES(29), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(48) + // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = -{ +static uint32_t opcode_timings_da_mod3[8] = { + // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, PAIR_X | CYCLES(5), INVALID, INVALID + // clang-format on }; - -static uint32_t opcode_timings_db[8] = -{ +static uint32_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FLDe FSTPe*/ INVALID, PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = -{ +static uint32_t opcode_timings_db_mod3[64] = { + // clang-format off PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), @@ -675,383 +699,387 @@ static uint32_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint32_t opcode_timings_dc[8] = -{ +static uint32_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), /* FSUBd FSUBRd FDIVd FDIVRd*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = -{ +static uint32_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_dd[8] = -{ +static uint32_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ PAIR_X | CYCLES(2), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FRSTOR FSAVE FSTSW*/ PAIR_X | CYCLES(72), INVALID, PAIR_X | CYCLES(67), PAIR_X | CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = -{ +static uint32_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ PAIR_X | CYCLES(3), INVALID, PAIR_X | CYCLES(2), PAIR_X | CYCLES(2), /* FUCOM FUCOMP*/ PAIR_X | CYCLES(4), PAIR_X | CYCLES(4), INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_de[8] = -{ +static uint32_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_X | CYCLES(12), PAIR_X | CYCLES(11), PAIR_X | CYCLES(10), PAIR_X | CYCLES(10), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_X | CYCLES(27), PAIR_X | CYCLES(27), PAIR_X | CYCLES(38), PAIR_X | CYCLES(38) }; -static uint32_t opcode_timings_de_mod3[8] = -{ +static uint32_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), INVALID, PAIR_X | CYCLES(7), /* FSUB FSUBR FDIV FDIVR*/ PAIR_X | CYCLES(7), PAIR_X | CYCLES(7), PAIR_X | CYCLES(34), PAIR_X | CYCLES(34) + // clang-format on }; -static uint32_t opcode_timings_df[8] = -{ +static uint32_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_X | CYCLES(8), INVALID, PAIR_X | CYCLES(10), PAIR_X | CYCLES(13), /* FILDiq FBSTP FISTPiq*/ INVALID, PAIR_X | CYCLES(8), PAIR_X | CYCLES(63), PAIR_X | CYCLES(13) + // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = -{ +static uint32_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ PAIR_X | CYCLES(6), INVALID, INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_8x[8] = -{ +static uint32_t opcode_timings_8x[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM + // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = -{ +static uint32_t opcode_timings_8x_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG + // clang-format on }; -static uint32_t opcode_timings_81[8] = -{ +static uint32_t opcode_timings_81[8] = { + // clang-format off PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RMW, PAIR_XY | CYCLES_RM + // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = -{ +static uint32_t opcode_timings_81_mod3[8] = { + // clang-format off PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG, PAIR_XY | CYCLES_REG + // clang-format on }; -static int decode_delay; +static int decode_delay; static uint8_t last_prefix; -static inline int COUNT(uint32_t c, int op_32) +static inline int +COUNT(uint32_t c, int op_32) { - if (c & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - if (!(c & PAIR_MASK)) - return c & 0xffff; + if (c & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + if (!(c & PAIR_MASK)) + return c & 0xffff; - return c & CYCLES_MASK; + return c & CYCLES_MASK; } -void codegen_timing_686_block_start(void) +void +codegen_timing_686_block_start(void) { - prev_full = decode_delay = 0; - regmask_modified = last_regmask_modified = 0; + prev_full = decode_delay = 0; + regmask_modified = last_regmask_modified = 0; } -void codegen_timing_686_start(void) +void +codegen_timing_686_start(void) { - decode_delay = 0; - last_prefix = 0; + decode_delay = 0; + last_prefix = 0; } -void codegen_timing_686_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_686_prefix(uint8_t prefix, uint32_t fetchdat) { - if ((prefix & 0xf8) == 0xd8) - { - last_prefix = prefix; - return; - } - if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) - { - /*0fh prefix is 'free' when used on conditional jumps*/ - last_prefix = prefix; - return; - } - - /*6x86 can decode 1 prefix per instruction per clock with no penalty. If - either instruction has more than one prefix then decode is delayed by - one cycle for each additional prefix*/ - decode_delay++; + if ((prefix & 0xf8) == 0xd8) { last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) { + /*0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + + /*6x86 can decode 1 prefix per instruction per clock with no penalty. If + either instruction has more than one prefix then decode is delayed by + one cycle for each additional prefix*/ + decode_delay++; + last_prefix = prefix; } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - if (addr_regmask & IMPL_ESP) - addr_regmask |= (1 << REG_ESP); + if (addr_regmask & IMPL_ESP) + addr_regmask |= (1 << REG_ESP); - if (regmask_modified & addr_regmask) - { - regmask_modified = 0; - return 2; - } + if (regmask_modified & addr_regmask) { + regmask_modified = 0; + return 2; + } - if (last_regmask_modified & addr_regmask) - return 1; + if (last_regmask_modified & addr_regmask) + return 1; - return 0; + return 0; } -void codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_686_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint32_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd0: + case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: - timings = mod3 ? opcode_timings_shift_imm_mod3 : opcode_timings_shift_imm; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + /*One prefix per instruction is free*/ + decode_delay--; + if (decode_delay < 0) + decode_delay = 0; - case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (prev_full) { + uint32_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, op_32); + int agi_stall = 0; - case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_cl_mod3 : opcode_timings_shift_cl; - deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; - opcode = (fetchdat >> 3) & 7; - break; + if (regmask & IMPL_ESP) + regmask |= SRCDEP_ESP | DSTDEP_ESP; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + agi_stall = check_agi(prev_deps, prev_opcode, prev_fetchdat, prev_op_32); - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } + /*Second instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else if (((timings[opcode] & PAIR_MASK) == PAIR_X || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) + && (prev_timings[opcode] & PAIR_MASK) == PAIR_X) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else if (prev_regmask & regmask) { + /*Instruction can not pair with previous*/ + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; + prev_full = 0; + last_regmask_modified = regmask_modified; + regmask_modified = prev_regmask; + } else { + int t1 = COUNT(prev_timings[prev_opcode], prev_op_32); + int t2 = COUNT(timings[opcode], op_32); + int t_pair = (t1 > t2) ? t1 : t2; + + if (!t_pair) + fatal("Pairable 0 cycles! %02x %02x\n", opcode, prev_opcode); + + agi_stall = check_agi(deps, opcode, fetchdat, op_32); + + codegen_block_cycles += t_pair + agi_stall; + decode_delay = (-t_pair) + 1 + agi_stall; + + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | prev_regmask; + prev_full = 0; + return; } + } - /*One prefix per instruction is free*/ - decode_delay--; - if (decode_delay < 0) - decode_delay = 0; + if (!prev_full) { + /*First instruction in the pair*/ + if ((timings[opcode] & PAIR_MASK) == PAIR_NP || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) { + /*Instruction not pairable*/ + int agi_stall = 0; - if (prev_full) - { - uint32_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, op_32); - int agi_stall = 0; + agi_stall = check_agi(deps, opcode, fetchdat, op_32); - if (regmask & IMPL_ESP) - regmask |= SRCDEP_ESP | DSTDEP_ESP; - - agi_stall = check_agi(prev_deps, prev_opcode, prev_fetchdat, prev_op_32); - - /*Second instruction in the pair*/ - if ((timings[opcode] & PAIR_MASK) == PAIR_NP) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else if (((timings[opcode] & PAIR_MASK) == PAIR_X || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) - && (prev_timings[opcode] & PAIR_MASK) == PAIR_X) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else if (prev_regmask & regmask) - { - /*Instruction can not pair with previous*/ - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(prev_timings[prev_opcode], prev_op_32)) + 1 + agi_stall; - prev_full = 0; - last_regmask_modified = regmask_modified; - regmask_modified = prev_regmask; - } - else - { - int t1 = COUNT(prev_timings[prev_opcode], prev_op_32); - int t2 = COUNT(timings[opcode], op_32); - int t_pair = (t1 > t2) ? t1 : t2; - - if (!t_pair) - fatal("Pairable 0 cycles! %02x %02x\n", opcode, prev_opcode); - - agi_stall = check_agi(deps, opcode, fetchdat, op_32); - - codegen_block_cycles += t_pair + agi_stall; - decode_delay = (-t_pair) + 1 + agi_stall; - - last_regmask_modified = regmask_modified; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | prev_regmask; - prev_full = 0; - return; - } - } - - if (!prev_full) - { - /*First instruction in the pair*/ - if ((timings[opcode] & PAIR_MASK) == PAIR_NP || (timings[opcode] & PAIR_MASK) == PAIR_X_BRANCH) - { - /*Instruction not pairable*/ - int agi_stall = 0; - - agi_stall = check_agi(deps, opcode, fetchdat, op_32); - - codegen_block_cycles += COUNT(timings[opcode], op_32) + decode_delay + agi_stall; - decode_delay = (-COUNT(timings[opcode], op_32)) + 1 + agi_stall; - last_regmask_modified = regmask_modified; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); - } - else - { - /*Instruction might pair with next*/ - prev_full = 1; - prev_opcode = opcode; - prev_timings = timings; - prev_op_32 = op_32; - prev_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - if (prev_regmask & IMPL_ESP) - prev_regmask |= SRCDEP_ESP | DSTDEP_ESP; - prev_deps = deps; - prev_fetchdat = fetchdat; - return; - } + codegen_block_cycles += COUNT(timings[opcode], op_32) + decode_delay + agi_stall; + decode_delay = (-COUNT(timings[opcode], op_32)) + 1 + agi_stall; + last_regmask_modified = regmask_modified; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + } else { + /*Instruction might pair with next*/ + prev_full = 1; + prev_opcode = opcode; + prev_timings = timings; + prev_op_32 = op_32; + prev_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + if (prev_regmask & IMPL_ESP) + prev_regmask |= SRCDEP_ESP | DSTDEP_ESP; + prev_deps = deps; + prev_fetchdat = fetchdat; + return; } + } } -void codegen_timing_686_block_end(void) +void +codegen_timing_686_block_end(void) { - if (prev_full) - { - /*Run previous now*/ - codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay; - prev_full = 0; - } + if (prev_full) { + /*Run previous now*/ + codegen_block_cycles += COUNT(prev_timings[prev_opcode], prev_op_32) + decode_delay; + prev_full = 0; + } } -codegen_timing_t codegen_timing_686 = -{ - codegen_timing_686_start, - codegen_timing_686_prefix, - codegen_timing_686_opcode, - codegen_timing_686_block_start, - codegen_timing_686_block_end, - NULL +codegen_timing_t codegen_timing_686 = { + codegen_timing_686_start, + codegen_timing_686_prefix, + codegen_timing_686_opcode, + codegen_timing_686_block_start, + codegen_timing_686_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_common.c b/src/cpu/codegen_timing_common.c index a1f1b6ce7..0c538fc4e 100644 --- a/src/cpu/codegen_timing_common.c +++ b/src/cpu/codegen_timing_common.c @@ -8,8 +8,8 @@ #include "codegen_timing_common.h" -uint64_t opcode_deps[256] = -{ +uint64_t opcode_deps[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ SRCDEP_REG | MODRM, SRCDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, SRCDEP_REG | DSTDEP_REG | MODRM, /* ADD ADD PUSH ES POP ES*/ @@ -140,10 +140,11 @@ uint64_t opcode_deps[256] = 0, 0, 0, 0, /* CLD STD INCDEC*/ 0, 0, MODRM, 0 + // clang-format on }; -uint64_t opcode_deps_mod3[256] = -{ +uint64_t opcode_deps_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, SRCDEP_REG | DSTDEP_REG | SRCDEP_RM | MODRM, /* ADD ADD PUSH ES POP ES*/ @@ -274,10 +275,11 @@ uint64_t opcode_deps_mod3[256] = 0, 0, 0, 0, /* CLD STD INCDEC*/ 0, 0, SRCDEP_RM | DSTDEP_RM | MODRM, 0 + // clang-format on }; -uint64_t opcode_deps_0f[256] = -{ +uint64_t opcode_deps_0f[256] = { + // clang-format off /*00*/ MODRM, MODRM, MODRM, MODRM, 0, 0, 0, 0, 0, 0, 0, 0, @@ -357,9 +359,10 @@ uint64_t opcode_deps_0f[256] = 0, MODRM | MMX_MULTIPLY, 0, 0, MODRM, MODRM, MODRM, 0, MODRM, MODRM, MODRM, 0, + // clang-format on }; -uint64_t opcode_deps_0f_mod3[256] = -{ +uint64_t opcode_deps_0f_mod3[256] = { + // clang-format off /*00*/ MODRM, MODRM, MODRM, MODRM, 0, 0, 0, 0, 0, 0, 0, 0, @@ -439,10 +442,11 @@ uint64_t opcode_deps_0f_mod3[256] = 0, MODRM | MMX_MULTIPLY, 0, 0, MODRM, MODRM, MODRM, 0, MODRM, MODRM, MODRM, 0, + // clang-format on }; -uint64_t opcode_deps_0f0f[256] = -{ +uint64_t opcode_deps_0f0f[256] = { + // clang-format off /*00*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -522,9 +526,10 @@ uint64_t opcode_deps_0f0f[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_0f0f_mod3[256] = -{ +uint64_t opcode_deps_0f0f_mod3[256] = { + // clang-format off /*00*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -604,97 +609,111 @@ uint64_t opcode_deps_0f0f_mod3[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_shift[8] = -{ +uint64_t opcode_deps_shift[8] = { + // clang-format off MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, MODRM, + // clang-format on }; -uint64_t opcode_deps_shift_mod3[8] = -{ +uint64_t opcode_deps_shift_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, + // clang-format on }; -uint64_t opcode_deps_shift_cl[8] = -{ +uint64_t opcode_deps_shift_cl[8] = { + // clang-format off MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, MODRM | SRCDEP_ECX, + // clang-format on }; -uint64_t opcode_deps_shift_cl_mod3[8] = -{ +uint64_t opcode_deps_shift_cl_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, SRCDEP_RM | DSTDEP_RM | MODRM | SRCDEP_ECX, + // clang-format on }; -uint64_t opcode_deps_f6[8] = -{ +uint64_t opcode_deps_f6[8] = { + // clang-format off /* TST NOT NEG*/ MODRM, 0, MODRM, MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f6_mod3[8] = -{ +uint64_t opcode_deps_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f7[8] = -{ +uint64_t opcode_deps_f7[8] = { + // clang-format off /* TST NOT NEG*/ MODRM, 0, MODRM, MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_f7_mod3[8] = -{ +uint64_t opcode_deps_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ SRCDEP_RM | MODRM, 0, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, /* MUL IMUL DIV IDIV*/ SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | SRCDEP_RM | MODRM, SRCDEP_EAX | SRCDEP_EDX | DSTDEP_EAX | DSTDEP_EDX | MODRM + // clang-format on }; -uint64_t opcode_deps_ff[8] = -{ +uint64_t opcode_deps_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ MODRM, MODRM, MODRM | IMPL_ESP, MODRM, /* JMP JMP far PUSH*/ MODRM, MODRM, MODRM | IMPL_ESP, 0 + // clang-format on }; -uint64_t opcode_deps_ff_mod3[8] = -{ +uint64_t opcode_deps_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | DSTDEP_RM | MODRM, SRCDEP_RM | MODRM | IMPL_ESP, MODRM, /* JMP JMP far PUSH*/ SRCDEP_RM | MODRM, MODRM, SRCDEP_RM | MODRM | IMPL_ESP, 0 + // clang-format on }; -uint64_t opcode_deps_d8[8] = -{ +uint64_t opcode_deps_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_POP | FPU_READ_ST0 | MODRM, /* FSUBs FSUBRs FDIVs FDIVRs*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_d8_mod3[8] = -{ +uint64_t opcode_deps_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG, FPU_POP | FPU_READ_ST0 | FPU_READ_STREG, /* FSUB FSUBR FDIV FDIVR*/ FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG, FPU_RW_ST0 | FPU_READ_STREG + // clang-format on }; -uint64_t opcode_deps_d9[8] = -{ +uint64_t opcode_deps_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_POP | MODRM, /* FLDENV FLDCW FSTENV FSTCW*/ MODRM, MODRM, MODRM, MODRM + // clang-format on }; -uint64_t opcode_deps_d9_mod3[64] = -{ +uint64_t opcode_deps_d9_mod3[64] = { + // clang-format off /*FLD*/ FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, FPU_PUSH | FPU_READ_STREG, @@ -722,32 +741,35 @@ uint64_t opcode_deps_d9_mod3[64] = 0, 0, 0, 0, /* opFRNDINT opFSCALE opFSIN opFCOS*/ 0, 0, 0, 0 + // clang-format on }; -uint64_t opcode_deps_da[8] = -{ +uint64_t opcode_deps_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_da_mod3[8] = -{ +uint64_t opcode_deps_da_mod3[8] = { + // clang-format off 0, 0, 0, 0, /* FCOMPP*/ 0, FPU_POP2, 0, 0 + // clang-format on }; - -uint64_t opcode_deps_db[8] = -{ +uint64_t opcode_deps_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FLDe FSTPe*/ 0, FPU_PUSH | MODRM, 0, FPU_READ_ST0 | FPU_POP | MODRM + // clang-format on }; -uint64_t opcode_deps_db_mod3[64] = -{ +uint64_t opcode_deps_db_mod3[64] = { + // clang-format off 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -767,84 +789,97 @@ uint64_t opcode_deps_db_mod3[64] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // clang-format on }; -uint64_t opcode_deps_dc[8] = -{ +uint64_t opcode_deps_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FSUBd FSUBRd FDIVd FDIVRd*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_dc_mod3[8] = -{ +uint64_t opcode_deps_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, 0, 0, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG, FPU_READ_ST0 | FPU_RW_STREG + // clang-format on }; -uint64_t opcode_deps_dd[8] = -{ +uint64_t opcode_deps_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FRSTOR FSAVE FSTSW*/ MODRM, 0, MODRM, MODRM + // clang-format on }; -uint64_t opcode_deps_dd_mod3[8] = -{ +uint64_t opcode_deps_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ 0, 0, FPU_READ_ST0 | FPU_WRITE_STREG, FPU_READ_ST0 | FPU_WRITE_STREG | FPU_POP, /* FUCOM FUCOMP*/ FPU_READ_ST0 | FPU_READ_STREG, FPU_READ_ST0 | FPU_READ_STREG | FPU_POP, 0, 0 + // clang-format on }; -uint64_t opcode_deps_de[8] = -{ +uint64_t opcode_deps_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM, FPU_RW_ST0 | MODRM + // clang-format on }; -uint64_t opcode_deps_de_mod3[8] = -{ +uint64_t opcode_deps_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, 0, FPU_READ_ST0 | FPU_READ_ST1 | FPU_POP2, /* FSUBP FSUBRP FDIVP FDIVRP*/ FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP, FPU_READ_ST0 | FPU_RW_STREG | FPU_POP + // clang-format on }; -uint64_t opcode_deps_df[8] = -{ +uint64_t opcode_deps_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_PUSH | MODRM, 0, FPU_READ_ST0 | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, /* FILDiq FBSTP FISTPiq*/ 0, FPU_PUSH | MODRM, FPU_READ_ST0 | FPU_POP | MODRM, FPU_READ_ST0 | FPU_POP | MODRM + // clang-format on }; -uint64_t opcode_deps_df_mod3[8] = -{ +uint64_t opcode_deps_df_mod3[8] = { + // clang-format off 0, 0, 0, 0, /* FSTSW AX*/ 0, 0, 0, 0 + // clang-format on }; -uint64_t opcode_deps_81[8] = -{ +uint64_t opcode_deps_81[8] = { + // clang-format off MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632, MODRM | HAS_IMM1632 + // clang-format on }; -uint64_t opcode_deps_81_mod3[8] = -{ +uint64_t opcode_deps_81_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM1632, SRCDEP_RM | MODRM | HAS_IMM1632 + // clang-format on }; -uint64_t opcode_deps_8x[8] = -{ +uint64_t opcode_deps_8x[8] = { + // clang-format off MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8, MODRM | HAS_IMM8 + // clang-format on }; -uint64_t opcode_deps_8x_mod3[8] = -{ +uint64_t opcode_deps_8x_mod3[8] = { + // clang-format off SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | DSTDEP_RM | MODRM | HAS_IMM8, SRCDEP_RM | MODRM | HAS_IMM8 + // clang-format on }; diff --git a/src/cpu/codegen_timing_common.h b/src/cpu/codegen_timing_common.h index 679997802..cc3ff5a6f 100644 --- a/src/cpu/codegen_timing_common.h +++ b/src/cpu/codegen_timing_common.h @@ -1,79 +1,79 @@ #include "codegen_ops.h" /*Instruction has input dependency on register in REG field*/ -#define SRCDEP_REG (1ull << 0) +#define SRCDEP_REG (1ULL << 0) /*Instruction has input dependency on register in R/M field*/ -#define SRCDEP_RM (1ull << 1) +#define SRCDEP_RM (1ULL << 1) /*Instruction modifies register in REG field*/ -#define DSTDEP_REG (1ull << 2) +#define DSTDEP_REG (1ULL<< 2) /*Instruction modifies register in R/M field*/ -#define DSTDEP_RM (1ull << 3) +#define DSTDEP_RM (1ULL << 3) #define SRCDEP_SHIFT 4 #define DSTDEP_SHIFT 12 /*Instruction has input dependency on given register*/ -#define SRCDEP_EAX (1ull << 4) -#define SRCDEP_ECX (1ull << 5) -#define SRCDEP_EDX (1ull << 6) -#define SRCDEP_EBX (1ull << 7) -#define SRCDEP_ESP (1ull << 8) -#define SRCDEP_EBP (1ull << 9) -#define SRCDEP_ESI (1ull << 10) -#define SRCDEP_EDI (1ull << 11) +#define SRCDEP_EAX (1ULL << 4) +#define SRCDEP_ECX (1ULL << 5) +#define SRCDEP_EDX (1ULL << 6) +#define SRCDEP_EBX (1ULL << 7) +#define SRCDEP_ESP (1ULL << 8) +#define SRCDEP_EBP (1ULL << 9) +#define SRCDEP_ESI (1ULL << 10) +#define SRCDEP_EDI (1ULL << 11) /*Instruction modifies given register*/ -#define DSTDEP_EAX (1ull << 12) -#define DSTDEP_ECX (1ull << 13) -#define DSTDEP_EDX (1ull << 14) -#define DSTDEP_EBX (1ull << 15) -#define DSTDEP_ESP (1ull << 16) -#define DSTDEP_EBP (1ull << 17) -#define DSTDEP_ESI (1ull << 18) -#define DSTDEP_EDI (1ull << 19) +#define DSTDEP_EAX (1ULL << 12) +#define DSTDEP_ECX (1ULL << 13) +#define DSTDEP_EDX (1ULL << 14) +#define DSTDEP_EBX (1ULL << 15) +#define DSTDEP_ESP (1ULL << 16) +#define DSTDEP_EBP (1ULL << 17) +#define DSTDEP_ESI (1ULL << 18) +#define DSTDEP_EDI (1ULL << 19) /*Instruction has ModR/M byte*/ -#define MODRM (1ull << 20) +#define MODRM (1ULL << 20) /*Instruction implicitly uses ESP*/ -#define IMPL_ESP (1ull << 21) +#define IMPL_ESP (1ULL << 21) /*Instruction is MMX shift or pack/unpack instruction*/ -#define MMX_SHIFTPACK (1ull << 22) +#define MMX_SHIFTPACK (1ULL << 22) /*Instruction is MMX multiply instruction*/ -#define MMX_MULTIPLY (1ull << 23) +#define MMX_MULTIPLY (1ULL << 23) /*Instruction pops the FPU stack*/ -#define FPU_POP (1ull << 24) +#define FPU_POP (1ULL << 24) /*Instruction pops the FPU stack twice*/ -#define FPU_POP2 (1ull << 25) +#define FPU_POP2 (1ULL << 25) /*Instruction pushes onto the FPU stack*/ -#define FPU_PUSH (1ull << 26) +#define FPU_PUSH (1ULL << 26) /*Instruction writes to ST(0)*/ -#define FPU_WRITE_ST0 (1ull << 27) +#define FPU_WRITE_ST0 (1ULL << 27) /*Instruction reads from ST(0)*/ -#define FPU_READ_ST0 (1ull << 28) +#define FPU_READ_ST0 (1ULL << 28) /*Instruction reads from and writes to ST(0)*/ -#define FPU_RW_ST0 (3ull << 27) +#define FPU_RW_ST0 (3ULL << 27) /*Instruction reads from ST(1)*/ -#define FPU_READ_ST1 (1ull << 29) +#define FPU_READ_ST1 (1ULL << 29) /*Instruction writes to ST(1)*/ -#define FPU_WRITE_ST1 (1ull << 30) +#define FPU_WRITE_ST1 (1ULL << 30) /*Instruction reads from and writes to ST(1)*/ -#define FPU_RW_ST1 (3ull << 29) +#define FPU_RW_ST1 (3ULL << 29) /*Instruction reads from ST(reg)*/ -#define FPU_READ_STREG (1ull << 31) +#define FPU_READ_STREG (1ULL << 31) /*Instruction writes to ST(reg)*/ -#define FPU_WRITE_STREG (1ull << 32) +#define FPU_WRITE_STREG (1ULL << 32) /*Instruction reads from and writes to ST(reg)*/ -#define FPU_RW_STREG (3ull << 31) +#define FPU_RW_STREG (3ULL << 31) -#define FPU_FXCH (1ull << 33) +#define FPU_FXCH (1ULL << 33) -#define HAS_IMM8 (1ull << 34) -#define HAS_IMM1632 (1ull << 35) +#define HAS_IMM8 (1ULL << 34) +#define HAS_IMM1632 (1ULL << 35) #define REGMASK_IMPL_ESP (1 << 8) #define REGMASK_SHIFTPACK (1 << 9) diff --git a/src/cpu/codegen_timing_k6.c b/src/cpu/codegen_timing_k6.c index 6a59fc157..88215bb17 100644 --- a/src/cpu/codegen_timing_k6.c +++ b/src/cpu/codegen_timing_k6.c @@ -8,6 +8,7 @@ #include <86box/mem.h> #include "cpu.h" #include <86box/machine.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -16,865 +17,748 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -typedef enum uop_type_t -{ - UOP_ALU = 0, /*Executes in Integer X or Y units*/ - UOP_ALUX, /*Executes in Integer X unit*/ - UOP_LOAD, /*Executes in Load unit*/ - UOP_STORE, /*Executes in Store unit*/ - UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORE, /*Executes in Store unit*/ - UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORE, /*Executes in Store unit*/ - UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MEU, /*Executes in Multimedia unit*/ - UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ - UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ - UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ - UOP_BRANCH, /*Executes in Branch unit*/ - UOP_LIMM /*Does not require an execution unit*/ +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Integer X or Y units*/ + UOP_ALUX, /*Executes in Integer X unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORE, /*Executes in Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORE, /*Executes in Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORE, /*Executes in Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MEU, /*Executes in Multimedia unit*/ + UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/ + UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/ + UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_LIMM /*Does not require an execution unit*/ } uop_type_t; -typedef enum decode_type_t -{ - DECODE_SHORT, - DECODE_LONG, - DECODE_VECTOR +typedef enum decode_type_t { + DECODE_SHORT, + DECODE_LONG, + DECODE_VECTOR } decode_type_t; #define MAX_UOPS 10 -typedef struct risc86_uop_t -{ - uop_type_t type; - int throughput; - int latency; +typedef struct risc86_uop_t { + uop_type_t type; + int throughput; + int latency; } risc86_uop_t; -typedef struct risc86_instruction_t -{ - int nr_uops; - decode_type_t decode_type; - risc86_uop_t uop[MAX_UOPS]; +typedef struct risc86_instruction_t { + int nr_uops; + decode_type_t decode_type; + risc86_uop_t uop[MAX_UOPS]; } risc86_instruction_t; -static const risc86_instruction_t alu_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alux_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t alux_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t branch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t limm_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} +static const risc86_instruction_t limm_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t store_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t store_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} }; - -static const risc86_instruction_t bswap_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t bswap_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t leave_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t lods_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t loop_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mov_reg_seg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, +static const risc86_instruction_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, }; -static const risc86_instruction_t movs_op = -{ - .nr_uops = 4, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pop_reg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pop_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t pop_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t push_imm_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, +static const risc86_instruction_t push_imm_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, }; -static const risc86_instruction_t push_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t push_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t push_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t stos_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t stos_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_reg_b_op = -{ - .nr_uops = 1, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_mem_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t test_mem_imm_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t xchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_LONG, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_LONG, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t m3dn_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +static const risc86_instruction_t m3dn_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mmx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +static const risc86_instruction_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mmx_mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t mmx_shift_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +static const risc86_instruction_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_3dn_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_3dn_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_3DN, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_mmx_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t load_mmx_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t load_mmx_shift_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} +static const risc86_instruction_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t mload_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t mstore_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t mstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t pmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t pmul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2} +static const risc86_instruction_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t load_float_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t fstore_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t fstore_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t fdiv_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +static const risc86_instruction_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} }; -static const risc86_instruction_t fdiv_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40} +static const risc86_instruction_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_FLOAT, .throughput = 40, .latency = 40} }; -static const risc86_instruction_t fsin_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} +static const risc86_instruction_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62} }; -static const risc86_instruction_t fsqrt_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SHORT, - .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} +static const risc86_instruction_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SHORT, + .uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41} }; -static const risc86_instruction_t vector_fldcw_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} +static const risc86_instruction_t vector_fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8} }; -static const risc86_instruction_t vector_float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t vector_float_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t vector_float_l_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} +static const risc86_instruction_t vector_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50} }; -static const risc86_instruction_t vector_flde_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2} +static const risc86_instruction_t vector_flde_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_FLOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2} }; -static const risc86_instruction_t vector_fste_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, - .uop[1] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_fste_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}, + .uop[1] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux1_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux3_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux6_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux6_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alu_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alu_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_alux_store_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_alux_store_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_bound_op = -{ - .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_bound_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_call_far_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_call_far_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} +static const risc86_instruction_t vector_cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7} }; -static const risc86_instruction_t vector_cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_cmpxchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +static const risc86_instruction_t vector_cmpxchg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, }; -static const risc86_instruction_t vector_cmpxchg_b_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, +static const risc86_instruction_t vector_cmpxchg_b_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, }; -static const risc86_instruction_t vector_cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} +static const risc86_instruction_t vector_cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22} }; -static const risc86_instruction_t vector_div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_div16_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +static const risc86_instruction_t vector_div32_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} }; -static const risc86_instruction_t vector_div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 18, .latency = 18} +static const risc86_instruction_t vector_div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 18, .latency = 18} }; -static const risc86_instruction_t vector_emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} +static const risc86_instruction_t vector_emms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25} }; -static const risc86_instruction_t vector_enter_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_enter_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALU, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} +static const risc86_instruction_t vector_femms_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6} }; -static const risc86_instruction_t vector_in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} +static const risc86_instruction_t vector_in_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11} }; -static const risc86_instruction_t vector_ins_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_ins_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_int_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_int_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1 }, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[3] = {.type = UOP_ALU, .throughput = 20, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_iret_op = { + .nr_uops = 5, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[3] = { .type = UOP_ALU, .throughput = 20, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} +static const risc86_instruction_t vector_invd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000} }; -static const risc86_instruction_t vector_jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_load_alux_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_load_alux_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_loop_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_loop_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[2] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_lss_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[2] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mov_mem_seg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mov_mem_seg_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} +static const risc86_instruction_t vector_mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3} }; -static const risc86_instruction_t vector_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_mul64_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_out_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} +static const risc86_instruction_t vector_out_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10} }; -static const risc86_instruction_t vector_outs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 10, .latency = 10}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_outs_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1 }, + .uop[1] = { .type = UOP_STORE, .throughput = 10, .latency = 10}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 } }; -static const risc86_instruction_t vector_pusha_op = -{ - .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_popa_op = -{ - .nr_uops = 8, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_popa_op = { + .nr_uops = 8, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_popf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 17, .latency = 17} +static const risc86_instruction_t vector_popf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 }, + .uop[1] = { .type = UOP_ALUX, .throughput = 17, .latency = 17} }; -static const risc86_instruction_t vector_push_mem_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_push_mem_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_pushf_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_pushf_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_ret_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 3, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_retf_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_scas_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_setcc_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_setcc_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, - .uop[1] = {.type = UOP_ALUX, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}, + .uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_xchg_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .throughput = 1, .latency = 1} +static const risc86_instruction_t vector_xchg_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1} }; -static const risc86_instruction_t vector_xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .throughput = 1, .latency = 2} +static const risc86_instruction_t vector_xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2} }; -static const risc86_instruction_t vector_wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_VECTOR, - .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} +static const risc86_instruction_t vector_wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_VECTOR, + .uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000} }; #define INVALID NULL -static const risc86_instruction_t *opcode_timings[256] = -{ +static const risc86_instruction_t *opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1007,10 +891,11 @@ static const risc86_instruction_t *opcode_timings[256] = &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, /* CLD STD INCDEC*/ &vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alux_op, &alu_op, &alux_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1143,10 +1028,11 @@ static const risc86_instruction_t *opcode_timings_mod3[256] = &vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op, /* CLD STD INCDEC*/ &vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f[256] = -{ +static const risc86_instruction_t *opcode_timings_0f[256] = { + // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, @@ -1226,9 +1112,10 @@ static const risc86_instruction_t *opcode_timings_0f[256] = INVALID, &pmul_mem_op, INVALID, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_alu6_op, &vector_alu6_op, INVALID, &vector_invd_op, &vector_wbinvd_op, INVALID, INVALID, @@ -1308,10 +1195,11 @@ static const risc86_instruction_t *opcode_timings_0f_mod3[256] = INVALID, &pmul_op, INVALID, INVALID, &mmx_op, &mmx_op, &mmx_op, INVALID, &mmx_op, &mmx_op, &mmx_op, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f[256] = -{ +static const risc86_instruction_t *opcode_timings_0f0f[256] = { + // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1391,10 +1279,10 @@ static const risc86_instruction_t *opcode_timings_0f0f[256] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, - + // clang-format on }; -static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = -{ +static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = { + // clang-format off /*00*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1474,118 +1362,135 @@ static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, - + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift[8] = -{ +static const risc86_instruction_t *opcode_timings_shift[8] = { + // clang-format off &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_b[8] = { + // clang-format off &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_mod3[8] = { + // clang-format off &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] = { + // clang-format off &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &alux_op, &alux_op, &alux_op, &alux_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_80[8] = -{ +static const risc86_instruction_t *opcode_timings_80[8] = { + // clang-format off &alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_80_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_80_mod3[8] = { + // clang-format off &alux_op, &alux_op, &alux_store_op, &alux_store_op, &alux_op, &alux_op, &alux_op, &alux_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x[8] = -{ +static const risc86_instruction_t *opcode_timings_8x[8] = { + // clang-format off &alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_8x_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_8x_mod3[8] = { + // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6[8] = -{ +static const risc86_instruction_t *opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op, /* MUL IMUL DIV IDIV*/ &vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f6_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alux_op, &alux_op, /* MUL IMUL DIV IDIV*/ &vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7[8] = -{ +static const risc86_instruction_t *opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op, /* MUL IMUL DIV IDIV*/ &vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_f7_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ &vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff[8] = -{ +static const risc86_instruction_t *opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &vector_call_far_op, /* JMP JMP far PUSH*/ &branch_op, &vector_jmp_far_op, &push_mem_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_ff_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op, /* JMP JMP far PUSH*/ &branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8[8] = -{ +static const risc86_instruction_t *opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d8_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ &float_op, &float_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9[8] = -{ +static const risc86_instruction_t *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ &vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_d9_mod3[64] = -{ +static const risc86_instruction_t *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, @@ -1614,31 +1519,35 @@ static const risc86_instruction_t *opcode_timings_d9_mod3[64] = &fdiv_op, INVALID, &fsqrt_op, &fsin_op, /* opFRNDINT opFSCALE opFSIN opFCOS*/ &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_da[8] = -{ +static const risc86_instruction_t *opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_da_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, &float_op, INVALID, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_db[8] = -{ +static const risc86_instruction_t *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ INVALID, &vector_flde_op, INVALID, &vector_fste_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_db_mod3[64] = -{ +static const risc86_instruction_t *opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1664,108 +1573,113 @@ static const risc86_instruction_t *opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc[8] = -{ +static const risc86_instruction_t *opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dc_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ &float_op, &float_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd[8] = -{ +static const risc86_instruction_t *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ &vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op + // clang-format on }; -static const risc86_instruction_t *opcode_timings_dd_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, /* FUCOM FUCOMP*/ &float_op, &float_op, INVALID, INVALID + // clang-format on }; -static const risc86_instruction_t *opcode_timings_de[8] = -{ +static const risc86_instruction_t *opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_float_op, &load_float_op, &load_float_op, &load_float_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_de_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ &float_op, &float_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_df[8] = -{ +static const risc86_instruction_t *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ INVALID, &load_float_op, &vector_float_l_op, &fstore_op, + // clang-format on }; -static const risc86_instruction_t *opcode_timings_df_mod3[8] = -{ +static const risc86_instruction_t *opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ &float_op, INVALID, INVALID, INVALID + // clang-format on }; - static uint8_t last_prefix; -static int prefixes; +static int prefixes; static int decode_timestamp; static int last_complete_timestamp; -typedef struct k6_unit_t -{ - uint32_t uop_mask; - int first_available_cycle; +typedef struct k6_unit_t { + uint32_t uop_mask; + int first_available_cycle; } k6_unit_t; -static int nr_units; +static int nr_units; static k6_unit_t *units; /*K6 has dedicated MMX unit*/ -static k6_unit_t k6_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX)}, /*Integer X*/ - {.uop_mask = (1 << UOP_ALU)}, /*Integer Y*/ - {.uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL)}, /*Multimedia*/ - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +static k6_unit_t k6_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) }, /*Integer X*/ + { .uop_mask = (1 << UOP_ALU) }, /*Integer Y*/ + { .uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) }, /*Multimedia*/ + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ }; #define NR_K6_UNITS (sizeof(k6_units) / sizeof(k6_unit_t)) /*K6-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and 3DNow ALU between two execution units*/ -static k6_unit_t k6_2_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ - (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN)}, - {.uop_mask = (1 << UOP_FLOAT)}, /*Floating point*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/ - {.uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE)}, /*Store*/ - {.uop_mask = (1 << UOP_BRANCH)} /*Branch*/ +static k6_unit_t k6_2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/ + (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) }, + { .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/ + { .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/ + { .uop_mask = (1 << UOP_BRANCH) } /*Branch*/ }; #define NR_K6_2_UNITS (sizeof(k6_2_units) / sizeof(k6_unit_t)) @@ -1775,57 +1689,52 @@ static int mul_first_available_cycle; static int shift_first_available_cycle; static int m3dnow_first_available_cycle; -static int uop_run(const risc86_uop_t *uop, int decode_time) +static int +uop_run(const risc86_uop_t *uop, int decode_time) { - int c; - k6_unit_t *best_unit = NULL; - int best_start_cycle = 99999; + k6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; - /*UOP_LIMM does not require execution*/ - if (uop->type == UOP_LIMM) - return decode_time; + /*UOP_LIMM does not require execution*/ + if (uop->type == UOP_LIMM) + return decode_time; - /*Handle shared units on K6-2 and later*/ - if (units == k6_2_units) - { - if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) - decode_time = mul_first_available_cycle; - else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) - decode_time = shift_first_available_cycle; - else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) - decode_time = m3dnow_first_available_cycle; + /*Handle shared units on K6-2 and later*/ + if (units == k6_2_units) { + if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle) + decode_time = mul_first_available_cycle; + else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle) + decode_time = shift_first_available_cycle; + else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle) + decode_time = m3dnow_first_available_cycle; + } + + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } } + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); - /*Find execution unit for this uOP*/ - for (c = 0; c < nr_units; c++) - { - if (units[c].uop_mask & (1 << uop->type)) - { - if (units[c].first_available_cycle < best_start_cycle) - { - best_unit = &units[c]; - best_start_cycle = units[c].first_available_cycle; - } - } - } - if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->throughput; - if (best_start_cycle < decode_time) - best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->throughput; + if (units == k6_2_units) { + if (uop->type == UOP_MEU_MUL) + mul_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_SHIFT) + shift_first_available_cycle = best_start_cycle + uop->throughput; + else if (uop->type == UOP_MEU_3DN) + m3dnow_first_available_cycle = best_start_cycle + uop->throughput; + } - if (units == k6_2_units) - { - if (uop->type == UOP_MEU_MUL) - mul_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_SHIFT) - shift_first_available_cycle = best_start_cycle + uop->throughput; - else if (uop->type == UOP_MEU_3DN) - m3dnow_first_available_cycle = best_start_cycle + uop->throughput; - } - - return best_start_cycle + uop->throughput; + return best_start_cycle + uop->throughput; } /*The K6 decoder can decode, per clock : @@ -1833,14 +1742,13 @@ static int uop_run(const risc86_uop_t *uop, int decode_time) - 1 'long' instruction, up to 4 uOPs - 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay) */ -static struct -{ - int nr_uops; - const risc86_uop_t *uops[4]; - /*Earliest time a uop can start. If the timestamp is -1, then the uop is - part of a dependency chain and the start time is the completion time of - the previous uop*/ - int earliest_start[4]; +static struct { + int nr_uops; + const risc86_uop_t *uops[4]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[4]; } decode_buffer; #define NR_OPQUADS 6 @@ -1858,495 +1766,465 @@ static int fpu_st_timestamp[8]; dependent uop chains*/ static int last_uop_timestamp = 0; -void decode_flush(void) +void +decode_flush(void) { - int c; - int uop_timestamp = 0; + int uop_timestamp = 0; - /*Decoded opquad can not be submitted if there are no free spaces in the - opquad buffer*/ - if (decode_timestamp < opquad_completion_timestamp[next_opquad]) - decode_timestamp = opquad_completion_timestamp[next_opquad]; + /*Decoded opquad can not be submitted if there are no free spaces in the + opquad buffer*/ + if (decode_timestamp < opquad_completion_timestamp[next_opquad]) + decode_timestamp = opquad_completion_timestamp[next_opquad]; - /*Ensure that uops can not be submitted before they have been decoded*/ - if (decode_timestamp > last_uop_timestamp) - last_uop_timestamp = decode_timestamp; + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; - /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < decode_buffer.nr_uops; c++) - { - int start_timestamp; + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < decode_buffer.nr_uops; c++) { + int start_timestamp; - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; - - last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); - if (last_uop_timestamp > uop_timestamp) - uop_timestamp = last_uop_timestamp; - } - - /*Calculate opquad completion time. Since opquads complete in order, it - must be after the last completion.*/ - if (uop_timestamp <= last_complete_timestamp) - last_complete_timestamp = last_complete_timestamp + 1; + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; else - last_complete_timestamp = uop_timestamp; + start_timestamp = decode_buffer.earliest_start[c]; - /*Advance to next opquad in buffer*/ - opquad_completion_timestamp[next_opquad] = last_complete_timestamp; - next_opquad++; - if (next_opquad == NR_OPQUADS) - next_opquad = 0; + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } - decode_timestamp++; - decode_buffer.nr_uops = 0; + /*Calculate opquad completion time. Since opquads complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opquad in buffer*/ + opquad_completion_timestamp[next_opquad] = last_complete_timestamp; + next_opquad++; + if (next_opquad == NR_OPQUADS) + next_opquad = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on K6 short decoding*/ -static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { - int len = prefixes + 1; /*Opcode*/ - if (deps & MODRM) - { - len++; /*ModR/M*/ - if (deps & HAS_IMM8) - len++; - if (deps & HAS_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +static void +decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { - uint32_t regmask_required; - uint32_t regmask_modified; - int c, d; - int earliest_start = 0; - decode_type_t decode_type = ins->decode_type; - int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d; + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); - /*Generate input register mask, and determine the earliest time this - instruction can start. This is not accurate, as this is calculated per - x86 instruction when it should be handled per uop*/ - regmask_required = get_dstdep_mask(deps, fetchdat, bit8); - regmask_required |= get_addr_regmask(deps, fetchdat, op_32); - for (c = 0; c < 8; c++) - { - if (regmask_required & (1 << c)) - { - if (reg_available_timestamp[c] > decode_timestamp) - earliest_start = reg_available_timestamp[c]; - } + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; } - if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) - earliest_start = fpu_st_timestamp[0]; - if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) - earliest_start = fpu_st_timestamp[1]; - if ((deps & FPU_RW_STREG)) - { - int reg = fetchdat & 7; + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; - if (fpu_st_timestamp[reg] > decode_timestamp) - earliest_start = fpu_st_timestamp[reg]; - } + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } - /*Short decoders are limited to 7 bytes*/ - if (decode_type == DECODE_SHORT && instr_length > 7) - decode_type = DECODE_LONG; - /*Long decoder is limited to 11 bytes*/ - else if (instr_length > 11) - decode_type = DECODE_VECTOR; + /*Short decoders are limited to 7 bytes*/ + if (decode_type == DECODE_SHORT && instr_length > 7) + decode_type = DECODE_LONG; + /*Long decoder is limited to 11 bytes*/ + else if (instr_length > 11) + decode_type = DECODE_VECTOR; - switch (decode_type) - { - case DECODE_SHORT: - if (decode_buffer.nr_uops) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[decode_buffer.nr_uops+1] = &ins->uop[1]; - decode_buffer.earliest_start[decode_buffer.nr_uops+1] = -1; - } - decode_buffer.nr_uops += ins->nr_uops; - - decode_flush(); + switch (decode_type) { + case DECODE_SHORT: + if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[decode_buffer.nr_uops + 1] = &ins->uop[1]; + decode_buffer.earliest_start[decode_buffer.nr_uops + 1] = -1; } - else - { - decode_buffer.nr_uops = ins->nr_uops; - decode_buffer.uops[0] = &ins->uop[0]; - decode_buffer.earliest_start[0] = earliest_start; - if (ins->nr_uops > 1) - { - decode_buffer.uops[1] = &ins->uop[1]; - decode_buffer.earliest_start[1] = -1; - } - } - break; + decode_buffer.nr_uops += ins->nr_uops; - case DECODE_LONG: - if (decode_buffer.nr_uops) - decode_flush(); - - decode_buffer.nr_uops = ins->nr_uops; - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[c] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[c] = earliest_start; - else - decode_buffer.earliest_start[c] = -1; - } decode_flush(); - break; - - case DECODE_VECTOR: - if (decode_buffer.nr_uops) - decode_flush(); - - decode_timestamp++; - d = 0; - - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; - - if (d == 4) - { - d = 0; - decode_buffer.nr_uops = 4; - decode_flush(); - } + } else { + decode_buffer.nr_uops = ins->nr_uops; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + if (ins->nr_uops > 1) { + decode_buffer.uops[1] = &ins->uop[1]; + decode_buffer.earliest_start[1] = -1; } - if (d) - { - decode_buffer.nr_uops = d; - decode_flush(); - } - break; - } + } + break; - /*Update write timestamps for any output registers*/ - regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); - for (c = 0; c < 8; c++) - { - if (regmask_modified & (1 << c)) - reg_available_timestamp[c] = last_complete_timestamp; - } - if (deps & FPU_POP) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; - fpu_st_timestamp[7] = 0; - } - if (deps & FPU_POP2) - { - for (c = 0; c < 6; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; - fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; - } - if (deps & FPU_PUSH) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; - fpu_st_timestamp[0] = 0; - } - if (deps & FPU_WRITE_ST0) - fpu_st_timestamp[0] = last_complete_timestamp; - if (deps & FPU_WRITE_ST1) - fpu_st_timestamp[1] = last_complete_timestamp; - if (deps & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps & FPU_WRITE_ST0)) && - !(reg == 1 && (deps & FPU_WRITE_ST1))) - fpu_st_timestamp[reg] = last_complete_timestamp; - } -} + case DECODE_LONG: + if (decode_buffer.nr_uops) + decode_flush(); -void codegen_timing_k6_block_start(void) -{ - int c; - - for (c = 0; c < nr_units; c++) - units[c].first_available_cycle = 0; - - mul_first_available_cycle = 0; - shift_first_available_cycle = 0; - m3dnow_first_available_cycle = 0; - - decode_timestamp = 0; - last_complete_timestamp = 0; - - for (c = 0; c < NR_OPQUADS; c++) - opquad_completion_timestamp[c] = 0; - next_opquad = 0; - - for (c = 0; c < NR_REGS; c++) - reg_available_timestamp[c] = 0; - for (c = 0; c < 8; c++) - fpu_st_timestamp[c] = 0; -} - -void codegen_timing_k6_start(void) -{ - if (cpu_s->cpu_type == CPU_K6) - { - units = k6_units; - nr_units = NR_K6_UNITS; - } - else - { - units = k6_2_units; - nr_units = NR_K6_2_UNITS; - } - last_prefix = 0; - prefixes = 0; -} - -void codegen_timing_k6_prefix(uint8_t prefix, uint32_t fetchdat) -{ - if (prefix != 0x0f) - decode_timestamp++; - - last_prefix = prefix; - prefixes++; -} - -void codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) -{ - const risc86_instruction_t **ins_table; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int old_last_complete_timestamp = last_complete_timestamp; - int bit8 = !(opcode & 1); - - switch (last_prefix) - { - case 0x0f: - if (opcode == 0x0f) - { - /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ - uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ - uint8_t modrm = fetchdat & 0xff; - uint8_t sib = (fetchdat >> 8) & 0xff; - - if ((modrm & 0xc0) != 0xc0) - { - if (op_32 & 0x200) - { - if ((modrm & 7) == 4) - { - /* Has SIB*/ - opcode_pc++; - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((sib & 0x07) == 0x05) - opcode_pc += 4; - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 4; - else if ((modrm & 0xc7) == 0x05) - opcode_pc += 4; - } - } - else - { - if ((modrm & 0xc0) == 0x40) - opcode_pc++; - else if ((modrm & 0xc0) == 0x80) - opcode_pc += 2; - else if ((modrm & 0xc7) == 0x06) - opcode_pc += 2; - } - } - - opcode = fastreadb(cs + opcode_pc); - - ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; - deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; - } + decode_buffer.nr_uops = ins->nr_uops; + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[c] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[c] = earliest_start; else - { - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - } - break; + decode_buffer.earliest_start[c] = -1; + } + decode_flush(); + break; - case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case DECODE_VECTOR: + if (decode_buffer.nr_uops) + decode_flush(); + + decode_timestamp++; + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if (d == 4) { + d = 0; + decode_buffer.nr_uops = 4; + decode_flush(); + } + } + if (d) { + decode_buffer.nr_uops = d; + decode_flush(); + } + break; + } + + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; + if (deps & FPU_POP) + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } +} + +void +codegen_timing_k6_block_start(void) +{ + int c; + + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; + + mul_first_available_cycle = 0; + shift_first_available_cycle = 0; + m3dnow_first_available_cycle = 0; + + decode_timestamp = 0; + last_complete_timestamp = 0; + + for (c = 0; c < NR_OPQUADS; c++) + opquad_completion_timestamp[c] = 0; + next_opquad = 0; + + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; +} + +void +codegen_timing_k6_start(void) +{ + if (cpu_s->cpu_type == CPU_K6) { + units = k6_units; + nr_units = NR_K6_UNITS; + } else { + units = k6_2_units; + nr_units = NR_K6_2_UNITS; + } + last_prefix = 0; + prefixes = 0; +} + +void +codegen_timing_k6_prefix(uint8_t prefix, uint32_t fetchdat) +{ + if (prefix != 0x0f) + decode_timestamp++; + + last_prefix = prefix; + prefixes++; +} + +void +codegen_timing_k6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +{ + const risc86_instruction_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); + + switch (last_prefix) { + case 0x0f: + if (opcode == 0x0f) { + /*3DNow has the actual opcode after ModR/M, SIB and any offset*/ + uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/ + uint8_t modrm = fetchdat & 0xff; + uint8_t sib = (fetchdat >> 8) & 0xff; + + if ((modrm & 0xc0) != 0xc0) { + if (op_32 & 0x200) { + if ((modrm & 7) == 4) { + /* Has SIB*/ + opcode_pc++; + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((sib & 0x07) == 0x05) + opcode_pc += 4; + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 4; + else if ((modrm & 0xc7) == 0x05) + opcode_pc += 4; + } + } else { + if ((modrm & 0xc0) == 0x40) + opcode_pc++; + else if ((modrm & 0xc0) == 0x80) + opcode_pc += 2; + else if ((modrm & 0xc7) == 0x06) + opcode_pc += 2; + } + } + + opcode = fastreadb(cs + opcode_pc); + + ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f; + deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f; + } else { + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + } + break; + + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} - case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; - - case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (ins_table[opcode]) - decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); - else - decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8); +void +codegen_timing_k6_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } } -void codegen_timing_k6_block_end(void) +int +codegen_timing_k6_jump_cycles(void) { - if (decode_buffer.nr_uops) - { - int old_last_complete_timestamp = last_complete_timestamp; - decode_flush(); - codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); - } + if (decode_buffer.nr_uops) + return 1; + return 0; } -int codegen_timing_k6_jump_cycles(void) -{ - if (decode_buffer.nr_uops) - return 1; - return 0; -} - -codegen_timing_t codegen_timing_k6 = -{ - codegen_timing_k6_start, - codegen_timing_k6_prefix, - codegen_timing_k6_opcode, - codegen_timing_k6_block_start, - codegen_timing_k6_block_end, - codegen_timing_k6_jump_cycles +codegen_timing_t codegen_timing_k6 = { + codegen_timing_k6_start, + codegen_timing_k6_prefix, + codegen_timing_k6_opcode, + codegen_timing_k6_block_start, + codegen_timing_k6_block_end, + codegen_timing_k6_jump_cycles }; diff --git a/src/cpu/codegen_timing_p6.c b/src/cpu/codegen_timing_p6.c index cf40e084e..008e36594 100644 --- a/src/cpu/codegen_timing_p6.c +++ b/src/cpu/codegen_timing_p6.c @@ -8,6 +8,7 @@ #include "cpu.h" #include <86box/mem.h> #include <86box/machine.h> +#include <86box/plat_unused.h> #include "x86.h" #include "x86_ops.h" @@ -17,893 +18,775 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" -typedef enum uop_type_t -{ - UOP_ALU = 0, /*Executes in Port 0 or 1 ALU units*/ - UOP_ALUP0, /*Executes in Port 0 ALU unit*/ - UOP_LOAD, /*Executes in Load unit*/ - UOP_STORED, /*Executes in Data Store unit*/ - UOP_STOREA, /*Executes in Address Store unit*/ - UOP_FLOAD, /*Executes in Load unit*/ - UOP_FSTORED, /*Executes in Data Store unit*/ - UOP_FSTOREA, /*Executes in Address Store unit*/ - UOP_MLOAD, /*Executes in Load unit*/ - UOP_MSTORED, /*Executes in Data Store unit*/ - UOP_MSTOREA, /*Executes in Address Store unit*/ - UOP_FLOAT, /*Executes in Floating Point unit*/ - UOP_MMX, /*Executes in Port 0 or 1 ALU units as MMX*/ - UOP_MMX_SHIFT, /*Executes in Port 1 ALU unit. Uses MMX shifter*/ - UOP_MMX_MUL, /*Executes in Port 0 ALU unit. Uses MMX multiplier*/ - UOP_BRANCH, /*Executes in Branch unit*/ - UOP_FXCH /*Does not require an execution unit*/ +typedef enum uop_type_t { + UOP_ALU = 0, /*Executes in Port 0 or 1 ALU units*/ + UOP_ALUP0, /*Executes in Port 0 ALU unit*/ + UOP_LOAD, /*Executes in Load unit*/ + UOP_STORED, /*Executes in Data Store unit*/ + UOP_STOREA, /*Executes in Address Store unit*/ + UOP_FLOAD, /*Executes in Load unit*/ + UOP_FSTORED, /*Executes in Data Store unit*/ + UOP_FSTOREA, /*Executes in Address Store unit*/ + UOP_MLOAD, /*Executes in Load unit*/ + UOP_MSTORED, /*Executes in Data Store unit*/ + UOP_MSTOREA, /*Executes in Address Store unit*/ + UOP_FLOAT, /*Executes in Floating Point unit*/ + UOP_MMX, /*Executes in Port 0 or 1 ALU units as MMX*/ + UOP_MMX_SHIFT, /*Executes in Port 1 ALU unit. Uses MMX shifter*/ + UOP_MMX_MUL, /*Executes in Port 0 ALU unit. Uses MMX multiplier*/ + UOP_BRANCH, /*Executes in Branch unit*/ + UOP_FXCH /*Does not require an execution unit*/ } uop_type_t; -typedef enum decode_type_t -{ - DECODE_SIMPLE, - DECODE_COMPLEX, +typedef enum decode_type_t { + DECODE_SIMPLE, + DECODE_COMPLEX, } decode_type_t; #define MAX_UOPS 10 -typedef struct p6_uop_t -{ - uop_type_t type; - int latency; +typedef struct p6_uop_t { + uop_type_t type; + int latency; } p6_uop_t; -typedef struct macro_op_t -{ - int nr_uops; - decode_type_t decode_type; - p6_uop_t uop[MAX_UOPS]; +typedef struct macro_op_t { + int nr_uops; + decode_type_t decode_type; + p6_uop_t uop[MAX_UOPS]; } macro_op_t; -static const macro_op_t alu_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t alup0_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t load_alu_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t load_alu_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t load_alup0_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t load_alup0_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alu_store_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t alu_store_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t alup0_store_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t alup0_store_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t branch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_BRANCH, .latency = 2} +static const macro_op_t branch_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_BRANCH, .latency = 2} }; -static const macro_op_t fxch_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FXCH, .latency = 1} +static const macro_op_t fxch_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FXCH, .latency = 1} }; -static const macro_op_t load_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t load_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_LOAD, .latency = 1} }; -static const macro_op_t store_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t store_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1} }; - -static const macro_op_t bswap_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, +static const macro_op_t bswap_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, }; -static const macro_op_t leave_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t leave_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t lods_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t lods_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t loop_op = -{ - .nr_uops = 5, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1}, - .uop[4] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t loop_op = { + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1}, + .uop[4] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t mov_reg_seg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, +static const macro_op_t mov_reg_seg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, }; -static const macro_op_t movs_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t movs_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t pop_reg_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t pop_reg_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t pop_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t pop_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t push_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1}, +static const macro_op_t push_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1}, }; -static const macro_op_t push_mem_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t push_mem_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t push_seg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t push_seg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t stos_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t stos_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[1] = {.type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_reg_b_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_reg_b_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t test_mem_imm_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_mem_imm_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_imm_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_mem_imm_b_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t xchg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t xchg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; - -static const macro_op_t mmx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX, .latency = 1} +static const macro_op_t mmx_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX, .latency = 1} }; -static const macro_op_t mmx_mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +static const macro_op_t mmx_mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} }; -static const macro_op_t mmx_shift_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1} +static const macro_op_t mmx_shift_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1} }; -static const macro_op_t load_mmx_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX, .latency = 2} +static const macro_op_t load_mmx_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX, .latency = 2} }; -static const macro_op_t load_mmx_mul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +static const macro_op_t load_mmx_mul_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_MUL, .latency = 2} }; -static const macro_op_t load_mmx_shift_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_SHIFT, .latency = 2} +static const macro_op_t load_mmx_shift_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_SHIFT, .latency = 2} }; -static const macro_op_t mload_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_MLOAD, .latency = 1}, +static const macro_op_t mload_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MLOAD, .latency = 1}, }; -static const macro_op_t mstore_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_MSTORED, .latency = 1}, - .uop[1] = {.type = UOP_MSTOREA, .latency = 1} +static const macro_op_t mstore_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_MSTORED, .latency = 1}, + .uop[1] = { .type = UOP_MSTOREA, .latency = 1} }; -static const macro_op_t pmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} +static const macro_op_t pmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_MMX_MUL, .latency = 1} }; -static const macro_op_t pmul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 2}, - .uop[1] = {.type = UOP_MMX_MUL, .latency = 2} +static const macro_op_t pmul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 2}, + .uop[1] = { .type = UOP_MMX_MUL, .latency = 2} }; -static const macro_op_t float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t float_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fadd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t fadd_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t fmul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 3} +static const macro_op_t fmul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 3} }; -static const macro_op_t float2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t float2_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fchs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 2}, - .uop[1] = {.type = UOP_FLOAT, .latency = 2}, - .uop[2] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t fchs_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = { .type = UOP_FLOAT, .latency = 2}, + .uop[2] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t load_float_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t load_float_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t load_fadd_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t load_fadd_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t load_fmul_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 4} +static const macro_op_t load_fmul_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 4} }; -static const macro_op_t fstore_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FSTORED, .latency = 1}, - .uop[1] = {.type = UOP_FSTOREA, .latency = 1}, +static const macro_op_t fstore_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FSTORED, .latency = 1}, + .uop[1] = { .type = UOP_FSTOREA, .latency = 1}, }; -static const macro_op_t load_fiadd_op = -{ - .nr_uops = 7, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 1}, - .uop[2] = {.type = UOP_FLOAT, .latency = 1}, - .uop[3] = {.type = UOP_FLOAT, .latency = 1}, - .uop[4] = {.type = UOP_FLOAT, .latency = 1}, - .uop[5] = {.type = UOP_FLOAT, .latency = 1}, - .uop[6] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t load_fiadd_op = { + .nr_uops = 7, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAT, .latency = 1}, + .uop[2] = { .type = UOP_FLOAT, .latency = 1}, + .uop[3] = { .type = UOP_FLOAT, .latency = 1}, + .uop[4] = { .type = UOP_FLOAT, .latency = 1}, + .uop[5] = { .type = UOP_FLOAT, .latency = 1}, + .uop[6] = { .type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t fdiv_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 37} +static const macro_op_t fdiv_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 37} }; -static const macro_op_t fdiv_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAT, .latency = 37} +static const macro_op_t fdiv_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1 }, + .uop[1] = { .type = UOP_FLOAT, .latency = 37} }; -static const macro_op_t fsin_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 62} +static const macro_op_t fsin_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 62} }; -static const macro_op_t fsqrt_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 69} +static const macro_op_t fsqrt_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 69} }; -static const macro_op_t fldcw_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_FLOAT, .latency = 10} +static const macro_op_t fldcw_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_FLOAT, .latency = 10} }; -static const macro_op_t complex_float_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 1} +static const macro_op_t complex_float_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 1} }; -static const macro_op_t complex_float_l_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 50} +static const macro_op_t complex_float_l_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 50} }; -static const macro_op_t flde_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAD, .latency = 1}, - .uop[1] = {.type = UOP_FLOAD, .latency = 1}, - .uop[2] = {.type = UOP_FLOAT, .latency = 2} +static const macro_op_t flde_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAD, .latency = 1}, + .uop[1] = { .type = UOP_FLOAD, .latency = 1}, + .uop[2] = { .type = UOP_FLOAT, .latency = 2} }; -static const macro_op_t fste_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_FLOAT, .latency = 2}, - .uop[1] = {.type = UOP_FSTORED, .latency = 1}, - .uop[2] = {.type = UOP_FSTOREA, .latency = 1} +static const macro_op_t fste_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_FLOAT, .latency = 2}, + .uop[1] = { .type = UOP_FSTORED, .latency = 1}, + .uop[2] = { .type = UOP_FSTOREA, .latency = 1} }; -static const macro_op_t complex_alu1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t complex_alu1_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu2_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu2_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu3_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t alu6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1}, - .uop[4] = {.type = UOP_ALU, .latency = 1}, - .uop[5] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t alu6_op = { + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1}, + .uop[4] = { .type = UOP_ALU, .latency = 1}, + .uop[5] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t complex_alup0_1_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t complex_alup0_1_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alup0_3_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_3_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t alup0_6_op = -{ - .nr_uops = 6, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1}, - .uop[3] = {.type = UOP_ALUP0, .latency = 1}, - .uop[4] = {.type = UOP_ALUP0, .latency = 1}, - .uop[5] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t alup0_6_op = { + .nr_uops = 6, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1}, + .uop[3] = { .type = UOP_ALUP0, .latency = 1}, + .uop[4] = { .type = UOP_ALUP0, .latency = 1}, + .uop[5] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t arpl_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t arpl_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t bound_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t bound_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t bsx_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 10} +static const macro_op_t bsx_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10} }; -static const macro_op_t call_far_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t call_far_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t cli_sti_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 7} +static const macro_op_t cli_sti_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 7} }; -static const macro_op_t cmps_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t cmps_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t cmpsb_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t cmpsb_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t cmpxchg_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t cmpxchg_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t cmpxchg_b_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_STORED, .latency = 1}, - .uop[3] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t cmpxchg_b_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_STORED, .latency = 1}, + .uop[3] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t complex_push_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t complex_push_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1}, + .uop[1] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t cpuid_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 23} +static const macro_op_t cpuid_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 23} }; -static const macro_op_t div16_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 21} +static const macro_op_t div16_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 21} }; -static const macro_op_t div16_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 21} +static const macro_op_t div16_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALUP0, .latency = 21} }; -static const macro_op_t div32_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 37} +static const macro_op_t div32_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 37} }; -static const macro_op_t div32_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 37} +static const macro_op_t div32_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALUP0, .latency = 37} }; -static const macro_op_t emms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 50} +static const macro_op_t emms_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 50} }; -static const macro_op_t enter_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 1}, - .uop[1] = {.type = UOP_STOREA, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 10} +static const macro_op_t enter_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 1 }, + .uop[1] = { .type = UOP_STOREA, .latency = 1 }, + .uop[2] = { .type = UOP_ALU, .latency = 10} }; -static const macro_op_t femms_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 6} +static const macro_op_t femms_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 6} }; -static const macro_op_t in_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 18} +static const macro_op_t in_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18} }; -static const macro_op_t ins_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 18}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t ins_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 18}, + .uop[1] = { .type = UOP_STORED, .latency = 1 }, + .uop[2] = { .type = UOP_STOREA, .latency = 1 }, + .uop[3] = { .type = UOP_ALU, .latency = 1 } }; -static const macro_op_t int_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 20}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_STORED, .latency = 1}, - .uop[4] = {.type = UOP_STOREA, .latency = 1}, - .uop[5] = {.type = UOP_STORED, .latency = 1}, - .uop[6] = {.type = UOP_STOREA, .latency = 1}, - .uop[7] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t int_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 20}, + .uop[1] = { .type = UOP_STORED, .latency = 1 }, + .uop[2] = { .type = UOP_STOREA, .latency = 1 }, + .uop[3] = { .type = UOP_STORED, .latency = 1 }, + .uop[4] = { .type = UOP_STOREA, .latency = 1 }, + .uop[5] = { .type = UOP_STORED, .latency = 1 }, + .uop[6] = { .type = UOP_STOREA, .latency = 1 }, + .uop[7] = { .type = UOP_BRANCH, .latency = 1 } }; -static const macro_op_t iret_op = -{ - .nr_uops = 5, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 3}, - .uop[1] = {.type = UOP_LOAD, .latency = 3}, - .uop[2] = {.type = UOP_LOAD, .latency = 3}, - .uop[3] = {.type = UOP_ALU, .latency = 20}, - .uop[4] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t iret_op = { + .nr_uops = 5, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 3 }, + .uop[1] = { .type = UOP_LOAD, .latency = 3 }, + .uop[2] = { .type = UOP_LOAD, .latency = 3 }, + .uop[3] = { .type = UOP_ALU, .latency = 20}, + .uop[4] = { .type = UOP_BRANCH, .latency = 1 } }; -static const macro_op_t invd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 500} +static const macro_op_t invd_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 500} }; -static const macro_op_t jmp_far_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3}, - .uop[1] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t jmp_far_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3}, + .uop[1] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t lss_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t lss_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t mov_mem_seg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, +static const macro_op_t mov_mem_seg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, }; -static const macro_op_t mov_seg_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t mov_seg_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 3} }; -static const macro_op_t mov_seg_reg_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 3} +static const macro_op_t mov_seg_reg_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 3} }; -static const macro_op_t mul_op = -{ - .nr_uops = 1, - .decode_type = DECODE_SIMPLE, - .uop[0] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul_op = { + .nr_uops = 1, + .decode_type = DECODE_SIMPLE, + .uop[0] = {.type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul64_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul64_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t mul64_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALUP0, .latency = 1}, - .uop[3] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t mul64_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALUP0, .latency = 1}, + .uop[3] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t out_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 18} +static const macro_op_t out_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 18} }; -static const macro_op_t outs_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 18} +static const macro_op_t outs_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALU, .latency = 18} }; -static const macro_op_t pusha_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_STORED, .latency = 2}, - .uop[1] = {.type = UOP_STOREA, .latency = 2}, - .uop[2] = {.type = UOP_STORED, .latency = 2}, - .uop[3] = {.type = UOP_STOREA, .latency = 2}, - .uop[4] = {.type = UOP_STORED, .latency = 2}, - .uop[5] = {.type = UOP_STOREA, .latency = 2}, - .uop[6] = {.type = UOP_STORED, .latency = 2}, - .uop[7] = {.type = UOP_STOREA, .latency = 2} +static const macro_op_t pusha_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_STORED, .latency = 2}, + .uop[1] = { .type = UOP_STOREA, .latency = 2}, + .uop[2] = { .type = UOP_STORED, .latency = 2}, + .uop[3] = { .type = UOP_STOREA, .latency = 2}, + .uop[4] = { .type = UOP_STORED, .latency = 2}, + .uop[5] = { .type = UOP_STOREA, .latency = 2}, + .uop[6] = { .type = UOP_STORED, .latency = 2}, + .uop[7] = { .type = UOP_STOREA, .latency = 2} }; -static const macro_op_t popa_op = -{ - .nr_uops = 8, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1}, - .uop[2] = {.type = UOP_LOAD, .latency = 1}, - .uop[3] = {.type = UOP_LOAD, .latency = 1}, - .uop[4] = {.type = UOP_LOAD, .latency = 1}, - .uop[5] = {.type = UOP_LOAD, .latency = 1}, - .uop[6] = {.type = UOP_LOAD, .latency = 1}, - .uop[7] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t popa_op = { + .nr_uops = 8, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1}, + .uop[2] = { .type = UOP_LOAD, .latency = 1}, + .uop[3] = { .type = UOP_LOAD, .latency = 1}, + .uop[4] = { .type = UOP_LOAD, .latency = 1}, + .uop[5] = { .type = UOP_LOAD, .latency = 1}, + .uop[6] = { .type = UOP_LOAD, .latency = 1}, + .uop[7] = { .type = UOP_LOAD, .latency = 1} }; -static const macro_op_t popf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 6}, - .uop[2] = {.type = UOP_ALUP0, .latency = 10} +static const macro_op_t popf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1 }, + .uop[1] = { .type = UOP_ALU, .latency = 6 }, + .uop[2] = { .type = UOP_ALUP0, .latency = 10} }; -static const macro_op_t pushf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1} +static const macro_op_t pushf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1} }; -static const macro_op_t ret_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t ret_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t retf_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 3}, - .uop[2] = {.type = UOP_BRANCH, .latency = 1} +static const macro_op_t retf_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 3}, + .uop[2] = { .type = UOP_BRANCH, .latency = 1} }; -static const macro_op_t scas_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t scas_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t scasb_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t scasb_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t setcc_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_FSTORED, .latency = 1}, - .uop[3] = {.type = UOP_FSTOREA, .latency = 1} +static const macro_op_t setcc_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_FSTORED, .latency = 1}, + .uop[3] = { .type = UOP_FSTOREA, .latency = 1} }; -static const macro_op_t setcc_reg_op = -{ - .nr_uops = 3, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALUP0, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1}, - .uop[2] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t setcc_reg_op = { + .nr_uops = 3, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALUP0, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1}, + .uop[2] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t test_mem_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t test_mem_b_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_ALUP0, .latency = 1} +static const macro_op_t test_mem_b_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_ALUP0, .latency = 1} }; -static const macro_op_t xchg_mem_op = -{ - .nr_uops = 4, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_LOAD, .latency = 1}, - .uop[1] = {.type = UOP_STORED, .latency = 1}, - .uop[2] = {.type = UOP_STOREA, .latency = 1}, - .uop[3] = {.type = UOP_ALU, .latency = 1} +static const macro_op_t xchg_mem_op = { + .nr_uops = 4, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_LOAD, .latency = 1}, + .uop[1] = { .type = UOP_STORED, .latency = 1}, + .uop[2] = { .type = UOP_STOREA, .latency = 1}, + .uop[3] = { .type = UOP_ALU, .latency = 1} }; -static const macro_op_t xlat_op = -{ - .nr_uops = 2, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 1}, - .uop[1] = {.type = UOP_LOAD, .latency = 1} +static const macro_op_t xlat_op = { + .nr_uops = 2, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 1}, + .uop[1] = { .type = UOP_LOAD, .latency = 1} }; -static const macro_op_t wbinvd_op = -{ - .nr_uops = 1, - .decode_type = DECODE_COMPLEX, - .uop[0] = {.type = UOP_ALU, .latency = 10000} +static const macro_op_t wbinvd_op = { + .nr_uops = 1, + .decode_type = DECODE_COMPLEX, + .uop[0] = {.type = UOP_ALU, .latency = 10000} }; #define INVALID NULL -static const macro_op_t *opcode_timings[256] = -{ +static const macro_op_t *opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_store_op, &alu_store_op, &load_alup0_op, &load_alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1036,10 +919,11 @@ static const macro_op_t *opcode_timings[256] = &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, /* CLD STD INCDEC*/ &complex_alu1_op, &complex_alu1_op, &alup0_store_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_mod3[256] = -{ +static const macro_op_t *opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ &alup0_op, &alu_op, &alup0_op, &alu_op, /* ADD ADD PUSH ES POP ES*/ @@ -1173,10 +1057,11 @@ static const macro_op_t *opcode_timings_mod3[256] = &complex_alu1_op, &complex_alu1_op, &cli_sti_op, &cli_sti_op, /* CLD STD INCDEC*/ &complex_alu1_op, &complex_alu1_op, &complex_alup0_1_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_0f[256] = -{ +static const macro_op_t *opcode_timings_0f[256] = { + // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, &invd_op, &wbinvd_op, INVALID, INVALID, @@ -1256,9 +1141,10 @@ static const macro_op_t *opcode_timings_0f[256] = INVALID, &pmul_mem_op, INVALID, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID, + // clang-format on }; -static const macro_op_t *opcode_timings_0f_mod3[256] = -{ +static const macro_op_t *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ &alu6_op, &alu6_op, &alu6_op, &alu6_op, INVALID, &alu6_op, &alu6_op, INVALID, &invd_op, &wbinvd_op, INVALID, INVALID, @@ -1342,113 +1228,131 @@ static const macro_op_t *opcode_timings_0f_mod3[256] = static const macro_op_t *opcode_timings_shift[8] = { + // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_b[8] = -{ +static const macro_op_t *opcode_timings_shift_b[8] = { + // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_mod3[8] = -{ +static const macro_op_t *opcode_timings_shift_mod3[8] = { + // clang-format off &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &alu_op, &alu_op, &alu_op, &alu_op + // clang-format on }; -static const macro_op_t *opcode_timings_shift_b_mod3[8] = -{ +static const macro_op_t *opcode_timings_shift_b_mod3[8] = { + // clang-format off &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &complex_alup0_1_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op + // clang-format on }; -static const macro_op_t *opcode_timings_80[8] = -{ +static const macro_op_t *opcode_timings_80[8] = { + // clang-format off &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, &alup0_store_op, + // clang-format on }; -static const macro_op_t *opcode_timings_80_mod3[8] = -{ +static const macro_op_t *opcode_timings_80_mod3[8] = { + // clang-format off &alup0_op, &alup0_op, &alup0_store_op, &alup0_store_op, &alup0_op, &alup0_op, &alup0_op, &alup0_op, + // clang-format on }; -static const macro_op_t *opcode_timings_8x[8] = -{ +static const macro_op_t *opcode_timings_8x[8] = { + // clang-format off &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op, + // clang-format on }; -static const macro_op_t *opcode_timings_8x_mod3[8] = -{ +static const macro_op_t *opcode_timings_8x_mod3[8] = { + // clang-format off &alu_op, &alu_op, &alu_store_op, &alu_store_op, &alu_op, &alu_op, &alu_op, &alu_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f6[8] = -{ +static const macro_op_t *opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_b_op, INVALID, &alup0_store_op, &alup0_store_op, /* MUL IMUL DIV IDIV*/ &mul_mem_op, &mul_mem_op, &div16_mem_op, &div16_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f6_mod3[8] = -{ +static const macro_op_t *opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_b_op, INVALID, &alup0_op, &alup0_op, /* MUL IMUL DIV IDIV*/ &mul_op, &mul_op, &div16_op, &div16_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f7[8] = -{ +static const macro_op_t *opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ &test_mem_imm_op, INVALID, &alu_store_op, &alu_store_op, /* MUL IMUL DIV IDIV*/ &mul64_mem_op, &mul64_mem_op, &div32_mem_op, &div32_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_f7_mod3[8] = -{ +static const macro_op_t *opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ &test_reg_op, INVALID, &alu_op, &alu_op, /* MUL IMUL DIV IDIV*/ &mul64_op, &mul64_op, &div32_op, &div32_op, + // clang-format on }; -static const macro_op_t *opcode_timings_ff[8] = -{ +static const macro_op_t *opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &alu_store_op, &alu_store_op, &store_op, &call_far_op, /* JMP JMP far PUSH*/ &branch_op, &jmp_far_op, &push_mem_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_ff_mod3[8] = -{ +static const macro_op_t *opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ &complex_alu1_op, &complex_alu1_op, &store_op, &call_far_op, /* JMP JMP far PUSH*/ &branch_op, &jmp_far_op, &complex_push_mem_op, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_d8[8] = -{ +static const macro_op_t *opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBs FSUBRs FDIVs FDIVRs*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_d8_mod3[8] = -{ +static const macro_op_t *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ &fadd_op, &fmul_op, &float_op, &float_op, /* FSUB FSUBR FDIV FDIVR*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const macro_op_t *opcode_timings_d9[8] = -{ +static const macro_op_t *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDENV FLDCW FSTENV FSTCW*/ &complex_float_l_op, &fldcw_op, &complex_float_l_op, &complex_float_op + // clang-format on }; -static const macro_op_t *opcode_timings_d9_mod3[64] = -{ +static const macro_op_t *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, &float_op, @@ -1477,31 +1381,35 @@ static const macro_op_t *opcode_timings_d9_mod3[64] = &fdiv_op, INVALID, &fsqrt_op, &fsin_op, /* opFRNDINT opFSCALE opFSIN opFCOS*/ &float_op, &fdiv_op, &fsin_op, &fsin_op + // clang-format on }; -static const macro_op_t *opcode_timings_da[8] = -{ +static const macro_op_t *opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FISUBl FISUBRl FIDIVl FIDIVRl*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_da_mod3[8] = -{ +static const macro_op_t *opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, &float_op, INVALID, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_db[8] = -{ +static const macro_op_t *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FLDe FSTPe*/ INVALID, &flde_op, INVALID, &fste_op + // clang-format on }; -static const macro_op_t *opcode_timings_db_mod3[64] = -{ +static const macro_op_t *opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -1527,153 +1435,152 @@ static const macro_op_t *opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static const macro_op_t *opcode_timings_dc[8] = -{ +static const macro_op_t *opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ &load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op, /* FSUBd FSUBRd FDIVd FDIVRd*/ &load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op, + // clang-format on }; -static const macro_op_t *opcode_timings_dc_mod3[8] = -{ +static const macro_op_t *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ &fadd_op, &fmul_op, INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ &float_op, &float_op, &fdiv_op, &fdiv_op + // clang-format on }; -static const macro_op_t *opcode_timings_dd[8] = -{ +static const macro_op_t *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FRSTOR FSAVE FSTSW*/ &complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op + // clang-format on }; -static const macro_op_t *opcode_timings_dd_mod3[8] = -{ +static const macro_op_t *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ &float_op, INVALID, &float_op, &float_op, /* FUCOM FUCOMP*/ &float_op, &float_op, INVALID, INVALID + // clang-format on }; -static const macro_op_t *opcode_timings_de[8] = -{ +static const macro_op_t *opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, /* FISUBw FISUBRw FIDIVw FIDIVRw*/ &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op, + // clang-format on }; -static const macro_op_t *opcode_timings_de_mod3[8] = -{ +static const macro_op_t *opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ &fadd_op, &fmul_op, INVALID, &float_op, /* FSUBP FSUBRP FDIVP FDIVRP*/ &float_op, &float_op, &fdiv_op, &fdiv_op, + // clang-format on }; -static const macro_op_t *opcode_timings_df[8] = -{ +static const macro_op_t *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ &load_float_op, INVALID, &fstore_op, &fstore_op, /* FILDiq FBSTP FISTPiq*/ INVALID, &load_float_op, &complex_float_l_op, &fstore_op, + // clang-format on }; -static const macro_op_t *opcode_timings_df_mod3[8] = -{ +static const macro_op_t *opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ &float_op, INVALID, INVALID, INVALID + // clang-format on }; - static uint8_t last_prefix; -static int prefixes; +static int prefixes; static int decode_timestamp; static int last_complete_timestamp; -typedef struct p6_unit_t -{ - uint32_t uop_mask; - double first_available_cycle; +typedef struct p6_unit_t { + uint32_t uop_mask; + double first_available_cycle; } p6_unit_t; -static int nr_units; +static int nr_units; static p6_unit_t *units; /*Pentium Pro has no MMX*/ -static p6_unit_t ppro_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT)}, /*Port 0*/ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Port 1*/ - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Port 2*/ - {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Port 3*/ - {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Port 4*/ +static p6_unit_t ppro_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) }, /*Port 0*/ + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) }, /*Port 1*/ + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) }, /*Port 2*/ + { .uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) }, /*Port 3*/ + { .uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) }, /*Port 4*/ }; #define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t)) /*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/ -static p6_unit_t p2_units[] = -{ - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) | /*Port 0*/ - (1 << UOP_MMX) | (1 << UOP_MMX_MUL)}, - {.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Port 1*/ - (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)}, - {.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Port 2*/ - {.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Port 3*/ - {.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Port 4*/ +static p6_unit_t p2_units[] = { + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUP0) | (1 << UOP_FLOAT) | /*Port 0*/ + (1 << UOP_MMX) | (1 << UOP_MMX_MUL) }, + { .uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Port 1*/ + (1 << UOP_MMX) | (1 << UOP_MMX_SHIFT) }, + { .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Port 2*/ + { .uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED) }, /*Port 3*/ + { .uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA) }, /*Port 4*/ }; #define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t)) -static int uop_run(const p6_uop_t *uop, int decode_time) +static int +uop_run(const p6_uop_t *uop, int decode_time) { - int c; - p6_unit_t *best_unit = NULL; - int best_start_cycle = 99999; + p6_unit_t *best_unit = NULL; + int best_start_cycle = 99999; - /*UOP_FXCH does not require execution*/ - if (uop->type == UOP_FXCH) - return decode_time; + /*UOP_FXCH does not require execution*/ + if (uop->type == UOP_FXCH) + return decode_time; - /*Find execution unit for this uOP*/ - for (c = 0; c < nr_units; c++) - { - if (units[c].uop_mask & (1 << uop->type)) - { - if (units[c].first_available_cycle < best_start_cycle) - { - best_unit = &units[c]; - best_start_cycle = units[c].first_available_cycle; - } - } + /*Find execution unit for this uOP*/ + for (int c = 0; c < nr_units; c++) { + if (units[c].uop_mask & (1 << uop->type)) { + if (units[c].first_available_cycle < best_start_cycle) { + best_unit = &units[c]; + best_start_cycle = units[c].first_available_cycle; + } } - if (!best_unit) - fatal("uop_run: can not find execution unit\n"); + } + if (!best_unit) + fatal("uop_run: can not find execution unit\n"); - if (best_start_cycle < decode_time) - best_start_cycle = decode_time; - best_unit->first_available_cycle = best_start_cycle + uop->latency; + if (best_start_cycle < decode_time) + best_start_cycle = decode_time; + best_unit->first_available_cycle = best_start_cycle + uop->latency; - - - return best_start_cycle + uop->latency; + return best_start_cycle + uop->latency; } /*The P6 decoders can decode, per clock : - 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long - 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs */ -static struct -{ - int nr_uops; - const p6_uop_t *uops[6]; - /*Earliest time a uop can start. If the timestamp is -1, then the uop is - part of a dependency chain and the start time is the completion time of - the previous uop*/ - int earliest_start[6]; +static struct { + int nr_uops; + const p6_uop_t *uops[6]; + /*Earliest time a uop can start. If the timestamp is -1, then the uop is + part of a dependency chain and the start time is the completion time of + the previous uop*/ + int earliest_start[6]; } decode_buffer; #define NR_OPSEQS 3 @@ -1691,421 +1598,400 @@ static int fpu_st_timestamp[8]; dependent uop chains*/ static int last_uop_timestamp = 0; -void decode_flush_p6(void) +void +decode_flush_p6(void) { - int c; - int start_timestamp, uop_timestamp = 0; + int start_timestamp; + int uop_timestamp = 0; - /*Decoded opseq can not be submitted if there are no free spaces in the - opseq buffer*/ - if (decode_timestamp < opseq_completion_timestamp[next_opseq]) - decode_timestamp = opseq_completion_timestamp[next_opseq]; + /*Decoded opseq can not be submitted if there are no free spaces in the + opseq buffer*/ + if (decode_timestamp < opseq_completion_timestamp[next_opseq]) + decode_timestamp = opseq_completion_timestamp[next_opseq]; - /*Ensure that uops can not be submitted before they have been decoded*/ - if (decode_timestamp > last_uop_timestamp) - last_uop_timestamp = decode_timestamp; + /*Ensure that uops can not be submitted before they have been decoded*/ + if (decode_timestamp > last_uop_timestamp) + last_uop_timestamp = decode_timestamp; - /*Submit uops to execution units, and determine the latest completion time*/ - for (c = 0; c < (decode_buffer.nr_uops); c++) - { - if (decode_buffer.earliest_start[c] == -1) - start_timestamp = last_uop_timestamp; - else - start_timestamp = decode_buffer.earliest_start[c]; - - last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); - if (last_uop_timestamp > uop_timestamp) - uop_timestamp = last_uop_timestamp; - } - - /*Calculate opseq completion time. Since opseqs complete in order, it - must be after the last completion.*/ - if (uop_timestamp <= last_complete_timestamp) - last_complete_timestamp = last_complete_timestamp + 1; + /*Submit uops to execution units, and determine the latest completion time*/ + for (int c = 0; c < (decode_buffer.nr_uops); c++) { + if (decode_buffer.earliest_start[c] == -1) + start_timestamp = last_uop_timestamp; else - last_complete_timestamp = uop_timestamp; + start_timestamp = decode_buffer.earliest_start[c]; - /*Advance to next opseq in buffer*/ - opseq_completion_timestamp[next_opseq] = last_complete_timestamp; - next_opseq++; - if (next_opseq == NR_OPSEQS) - next_opseq = 0; + last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp); + if (last_uop_timestamp > uop_timestamp) + uop_timestamp = last_uop_timestamp; + } - decode_timestamp++; - decode_buffer.nr_uops = 0; + /*Calculate opseq completion time. Since opseqs complete in order, it + must be after the last completion.*/ + if (uop_timestamp <= last_complete_timestamp) + last_complete_timestamp = last_complete_timestamp + 1; + else + last_complete_timestamp = uop_timestamp; + + /*Advance to next opseq in buffer*/ + opseq_completion_timestamp[next_opseq] = last_complete_timestamp; + next_opseq++; + if (next_opseq == NR_OPSEQS) + next_opseq = 0; + + decode_timestamp++; + decode_buffer.nr_uops = 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on P6 simple decoding*/ -static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) +static int +codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32) { - int len = prefixes + 1; /*Opcode*/ - if (deps & MODRM) - { - len++; /*ModR/M*/ - if (deps & HAS_IMM8) - len++; - if (deps & HAS_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes + 1; /*Opcode*/ + if (deps & MODRM) { + len++; /*ModR/M*/ + if (deps & HAS_IMM8) + len++; + if (deps & HAS_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -static void decode_instruction(const macro_op_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) +static void +decode_instruction(const macro_op_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8) { - uint32_t regmask_required; - uint32_t regmask_modified; - int c; - int d = 0; /*Complex decoder uOPs*/ - int earliest_start = 0; - decode_type_t decode_type = ins->decode_type; - int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); + uint32_t regmask_required; + uint32_t regmask_modified; + int c; + int d = 0; /*Complex decoder uOPs*/ + int earliest_start = 0; + decode_type_t decode_type = ins->decode_type; + int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32); - /*Generate input register mask, and determine the earliest time this - instruction can start. This is not accurate, as this is calculated per - x86 instruction when it should be handled per uop*/ - regmask_required = get_dstdep_mask(deps, fetchdat, bit8); - regmask_required |= get_addr_regmask(deps, fetchdat, op_32); - for (c = 0; c < 8; c++) - { - if (regmask_required & (1 << c)) - { - if (reg_available_timestamp[c] > decode_timestamp) - earliest_start = reg_available_timestamp[c]; - } + /*Generate input register mask, and determine the earliest time this + instruction can start. This is not accurate, as this is calculated per + x86 instruction when it should be handled per uop*/ + regmask_required = get_dstdep_mask(deps, fetchdat, bit8); + regmask_required |= get_addr_regmask(deps, fetchdat, op_32); + for (c = 0; c < 8; c++) { + if (regmask_required & (1 << c)) { + if (reg_available_timestamp[c] > decode_timestamp) + earliest_start = reg_available_timestamp[c]; } - if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) - earliest_start = fpu_st_timestamp[0]; - if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) - earliest_start = fpu_st_timestamp[1]; - if ((deps & FPU_RW_STREG)) - { - int reg = fetchdat & 7; + } + if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp) + earliest_start = fpu_st_timestamp[0]; + if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp) + earliest_start = fpu_st_timestamp[1]; + if (deps & FPU_RW_STREG) { + int reg = fetchdat & 7; - if (fpu_st_timestamp[reg] > decode_timestamp) - earliest_start = fpu_st_timestamp[reg]; - } + if (fpu_st_timestamp[reg] > decode_timestamp) + earliest_start = fpu_st_timestamp[reg]; + } - /*Simple decoders are limited to 7 bytes & 1 uOP*/ - if ((decode_type == DECODE_SIMPLE && instr_length > 7) || (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)) - decode_type = DECODE_COMPLEX; + /*Simple decoders are limited to 7 bytes & 1 uOP*/ + if ((decode_type == DECODE_SIMPLE && instr_length > 7) || (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)) + decode_type = DECODE_COMPLEX; - switch (decode_type) - { - case DECODE_SIMPLE: - if (decode_buffer.nr_uops - d == 2) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 3; - decode_flush_p6(); - } - else if (decode_buffer.nr_uops - d == 1) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 2+d; - if (d) - decode_flush_p6(); - } - else if (decode_buffer.nr_uops) - { - decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; - decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; - decode_buffer.nr_uops = 1+d; - } - else - { - decode_buffer.nr_uops = 1; - decode_buffer.uops[0] = &ins->uop[0]; - decode_buffer.earliest_start[0] = earliest_start; - } - break; - - case DECODE_COMPLEX: - if (decode_buffer.nr_uops) - decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ - - d = 0; - - for (c = 0; c < ins->nr_uops; c++) - { - decode_buffer.uops[d] = &ins->uop[c]; - if (c == 0) - decode_buffer.earliest_start[d] = earliest_start; - else - decode_buffer.earliest_start[d] = -1; - d++; - - if ((d == 3) && (ins->nr_uops > 4)) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ - { - d = 0; - decode_buffer.nr_uops = 3; - decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ - } - } + switch (decode_type) { + case DECODE_SIMPLE: + if (decode_buffer.nr_uops - d == 2) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 3; + decode_flush_p6(); + } else if (decode_buffer.nr_uops - d == 1) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 2 + d; if (d) - { - decode_buffer.nr_uops = d; + decode_flush_p6(); + } else if (decode_buffer.nr_uops) { + decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0]; + decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start; + decode_buffer.nr_uops = 1 + d; + } else { + decode_buffer.nr_uops = 1; + decode_buffer.uops[0] = &ins->uop[0]; + decode_buffer.earliest_start[0] = earliest_start; + } + break; + + case DECODE_COMPLEX: + if (decode_buffer.nr_uops) + decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/ + + d = 0; + + for (c = 0; c < ins->nr_uops; c++) { + decode_buffer.uops[d] = &ins->uop[c]; + if (c == 0) + decode_buffer.earliest_start[d] = earliest_start; + else + decode_buffer.earliest_start[d] = -1; + d++; + + if ((d == 3) && (ins->nr_uops > 4)) { /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/ + d = 0; + decode_buffer.nr_uops = 3; + decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/ } - break; - } + } + if (d) { + decode_buffer.nr_uops = d; + } + break; + } - /*Update write timestamps for any output registers*/ - regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); - for (c = 0; c < 8; c++) - { - if (regmask_modified & (1 << c)) - reg_available_timestamp[c] = last_complete_timestamp; - } + /*Update write timestamps for any output registers*/ + regmask_modified = get_dstdep_mask(deps, fetchdat, bit8); + for (c = 0; c < 8; c++) { + if (regmask_modified & (1 << c)) + reg_available_timestamp[c] = last_complete_timestamp; + } + if (deps & FPU_POP) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 1]; + fpu_st_timestamp[7] = 0; + } + if (deps & FPU_POP2) { + for (c = 0; c < 6; c++) + fpu_st_timestamp[c] = fpu_st_timestamp[c + 2]; + fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; + } + if (deps & FPU_PUSH) { + for (c = 0; c < 7; c++) + fpu_st_timestamp[c + 1] = fpu_st_timestamp[c]; + fpu_st_timestamp[0] = 0; + } + if (deps & FPU_WRITE_ST0) + fpu_st_timestamp[0] = last_complete_timestamp; + if (deps & FPU_WRITE_ST1) + fpu_st_timestamp[1] = last_complete_timestamp; + if (deps & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps & FPU_POP) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+1]; - fpu_st_timestamp[7] = 0; - } - if (deps & FPU_POP2) - { - for (c = 0; c < 6; c++) - fpu_st_timestamp[c] = fpu_st_timestamp[c+2]; - fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0; - } - if (deps & FPU_PUSH) - { - for (c = 0; c < 7; c++) - fpu_st_timestamp[c+1] = fpu_st_timestamp[c]; - fpu_st_timestamp[0] = 0; - } - if (deps & FPU_WRITE_ST0) - fpu_st_timestamp[0] = last_complete_timestamp; - if (deps & FPU_WRITE_ST1) - fpu_st_timestamp[1] = last_complete_timestamp; - if (deps & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps & FPU_WRITE_ST0)) && - !(reg == 1 && (deps & FPU_WRITE_ST1))) - fpu_st_timestamp[reg] = last_complete_timestamp; - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1))) + fpu_st_timestamp[reg] = last_complete_timestamp; + } } -void codegen_timing_p6_block_start(void) +void +codegen_timing_p6_block_start(void) { - int c; + int c; - for (c = 0; c < nr_units; c++) - units[c].first_available_cycle = 0; + for (c = 0; c < nr_units; c++) + units[c].first_available_cycle = 0; - decode_timestamp = 0; - last_complete_timestamp = 0; + decode_timestamp = 0; + last_complete_timestamp = 0; - for (c = 0; c < NR_OPSEQS; c++) - opseq_completion_timestamp[c] = 0; - next_opseq = 0; + for (c = 0; c < NR_OPSEQS; c++) + opseq_completion_timestamp[c] = 0; + next_opseq = 0; - for (c = 0; c < NR_REGS; c++) - reg_available_timestamp[c] = 0; - for (c = 0; c < 8; c++) - fpu_st_timestamp[c] = 0; + for (c = 0; c < NR_REGS; c++) + reg_available_timestamp[c] = 0; + for (c = 0; c < 8; c++) + fpu_st_timestamp[c] = 0; } -void codegen_timing_p6_start(void) +void +codegen_timing_p6_start(void) { - if (cpu_s->cpu_type == CPU_PENTIUMPRO) - { - units = ppro_units; - nr_units = NR_PPRO_UNITS; - } - else - { - units = p2_units; - nr_units = NR_P2_UNITS; - } - last_prefix = 0; - prefixes = 0; + if (cpu_s->cpu_type == CPU_PENTIUMPRO) { + units = ppro_units; + nr_units = NR_PPRO_UNITS; + } else { + units = p2_units; + nr_units = NR_P2_UNITS; + } + last_prefix = 0; + prefixes = 0; } -void codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat) { - if (prefix != 0x0f) - decode_timestamp++; + if (prefix != 0x0f) + decode_timestamp++; - last_prefix = prefix; - prefixes++; + last_prefix = prefix; + prefixes++; } -void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - const macro_op_t **ins_table; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int old_last_complete_timestamp = last_complete_timestamp; - int bit8 = !(opcode & 1); + const macro_op_t **ins_table; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int old_last_complete_timestamp = last_complete_timestamp; + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + case 0x83: + ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xd0: + case 0xd2: + ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc1: + case 0xd1: + case 0xd3: + ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: - ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: case 0x83: - ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; + ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xd0: case 0xd2: - ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (ins_table[opcode]) + decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); + else + decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); + codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); +} - case 0xc1: case 0xd1: case 0xd3: - ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; - - case 0xf6: - ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - ins_table = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (ins_table[opcode]) - decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8); - else - decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8); +void +codegen_timing_p6_block_end(void) +{ + if (decode_buffer.nr_uops) { + int old_last_complete_timestamp = last_complete_timestamp; + decode_flush_p6(); codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); + } } -void codegen_timing_p6_block_end(void) +int +codegen_timing_p6_jump_cycles(void) { - if (decode_buffer.nr_uops) - { - int old_last_complete_timestamp = last_complete_timestamp; - decode_flush_p6(); - codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp); - } + if (decode_buffer.nr_uops) + return 1; + return 0; } -int codegen_timing_p6_jump_cycles(void) -{ - if (decode_buffer.nr_uops) - return 1; - return 0; -} - -codegen_timing_t codegen_timing_p6 = -{ - codegen_timing_p6_start, - codegen_timing_p6_prefix, - codegen_timing_p6_opcode, - codegen_timing_p6_block_start, - codegen_timing_p6_block_end, - codegen_timing_p6_jump_cycles +codegen_timing_t codegen_timing_p6 = { + codegen_timing_p6_start, + codegen_timing_p6_prefix, + codegen_timing_p6_opcode, + codegen_timing_p6_block_start, + codegen_timing_p6_block_end, + codegen_timing_p6_jump_cycles }; diff --git a/src/cpu/codegen_timing_pentium.c b/src/cpu/codegen_timing_pentium.c index 232455f6d..58ec5b454 100644 --- a/src/cpu/codegen_timing_pentium.c +++ b/src/cpu/codegen_timing_pentium.c @@ -14,8 +14,10 @@ #include #include #include <86box/86box.h> -#include <86box/mem.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" @@ -23,46 +25,45 @@ #include "codegen_ops.h" #include "codegen_timing_common.h" - /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 28) +#define CYCLES_HAS_MULTI (1 << 28) #define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) /*Instruction lasts given number of cycles. Does not pair*/ #define CYCLES(c) (c | PAIR_NP) - -static int pair_timings[4][4] = -{ -/* Reg RM RMW Branch*/ -/*Reg*/ {1, 2, 3, 2}, -/*RM*/ {2, 2, 3, 3}, -/*RMW*/ {3, 4, 5, 4}, -/*Branch*/ {-1, -1, -1, -1} +static int pair_timings[4][4] = { + /* Reg RM RMW Branch*/ + /*Reg*/ {1, 2, 3, 2 }, + /*RM*/ + { 2, 2, 3, 3 }, + /*RMW*/ + { 3, 4, 5, 4 }, + /*Branch*/ + { -1, -1, -1, -1} }; /*Instruction follows either register timing, read-modify, or read-modify-write. May be pairable*/ -#define CYCLES_REG (0ull << 0) -#define CYCLES_RM (1ull << 0) -#define CYCLES_RMW (2ull << 0) +#define CYCLES_REG (0ull << 0) +#define CYCLES_RM (1ull << 0) +#define CYCLES_RMW (2ull << 0) #define CYCLES_BRANCH (3ull << 0) /*Instruction has immediate data. Can only be used with PAIR_U/PAIR_V/PAIR_UV*/ -#define CYCLES_HASIMM (3ull << 2) +#define CYCLES_HASIMM (3ull << 2) #define CYCLES_IMM8 (1ull << 2) #define CYCLES_IMM1632 (2ull << 2) -#define CYCLES_MASK ((1ull << 7) - 1) - +#define CYCLES_MASK ((1ull << 7) - 1) /*Instruction does not pair*/ #define PAIR_NP (0ull << 29) /*Instruction pairs in U pipe only*/ -#define PAIR_U (1ull << 29) +#define PAIR_U (1ull << 29) /*Instruction pairs in V pipe only*/ -#define PAIR_V (2ull << 29) +#define PAIR_V (2ull << 29) /*Instruction pairs in both U and V pipes*/ #define PAIR_UV (3ull << 29) /*Instruction pairs in U pipe only and only with FXCH*/ @@ -70,36 +71,34 @@ static int pair_timings[4][4] = /*Instruction is FXCH and only pairs in V pipe with FX pairable instruction*/ #define PAIR_FXCH (6ull << 29) -#define PAIR_FPU (4ull << 29) +#define PAIR_FPU (4ull << 29) #define PAIR_MASK (7ull << 29) - /*comp_time = cycles until instruction complete i_overlap = cycles that overlap with integer f_overlap = cycles that overlap with subsequent FPU*/ -#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t)comp_time) | ((uint64_t)i_overlap << 41) | ((uint64_t)f_overlap << 49) | PAIR_FPU +#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t) comp_time) | ((uint64_t) i_overlap << 41) | ((uint64_t) f_overlap << 49) | PAIR_FPU -#define FPU_COMP_TIME(timing) (timing & 0xff) -#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) -#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) -#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) -#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) -#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) +#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) +#define INVALID 0 -#define INVALID 0 - -static int u_pipe_full; -static uint32_t u_pipe_opcode; +static int u_pipe_full; +static uint32_t u_pipe_opcode; static uint64_t *u_pipe_timings; -static uint32_t u_pipe_op_32; -static uint32_t u_pipe_regmask; -static uint32_t u_pipe_fetchdat; -static int u_pipe_decode_delay_offset; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; static uint64_t *u_pipe_deps; static uint32_t regmask_modified; @@ -109,8 +108,8 @@ static uint32_t addr_regmask; static int fpu_latency; static int fpu_st_latency[8]; -static uint64_t opcode_timings[256] = -{ +static uint64_t opcode_timings[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* ADD ADD PUSH ES POP ES*/ @@ -243,10 +242,11 @@ static uint64_t opcode_timings[256] = PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_RMW, INVALID + // clang-format on }; -static uint64_t opcode_timings_mod3[256] = -{ +static uint64_t opcode_timings_mod3[256] = { + // clang-format off /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* ADD ADD PUSH ES POP ES*/ @@ -380,10 +380,11 @@ static uint64_t opcode_timings_mod3[256] = PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_REG, INVALID + // clang-format on }; -static uint64_t opcode_timings_0f[256] = -{ +static uint64_t opcode_timings_0f[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -463,9 +464,10 @@ static uint64_t opcode_timings_0f[256] = INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, + // clang-format on }; -static uint64_t opcode_timings_0f_mod3[256] = -{ +static uint64_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, @@ -545,86 +547,98 @@ static uint64_t opcode_timings_0f_mod3[256] = INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, + // clang-format on }; -static uint64_t opcode_timings_shift[8] = -{ +static uint64_t opcode_timings_shift[8] = { + // clang-format off PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, + // clang-format on }; -static uint64_t opcode_timings_shift_mod3[8] = -{ +static uint64_t opcode_timings_shift_mod3[8] = { + // clang-format off PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, + // clang-format on }; -static uint64_t opcode_timings_f6[8] = -{ +static uint64_t opcode_timings_f6[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) + // clang-format on }; -static uint64_t opcode_timings_f6_mod3[8] = -{ +static uint64_t opcode_timings_f6_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) + // clang-format on }; -static uint64_t opcode_timings_f7[8] = -{ +static uint64_t opcode_timings_f7[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) + // clang-format on }; -static uint64_t opcode_timings_f7_mod3[8] = -{ +static uint64_t opcode_timings_f7_mod3[8] = { + // clang-format off /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) + // clang-format on }; -static uint64_t opcode_timings_ff[8] = -{ +static uint64_t opcode_timings_ff[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID + // clang-format on }; -static uint64_t opcode_timings_ff_mod3[8] = -{ +static uint64_t opcode_timings_ff_mod3[8] = { + // clang-format off /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID + // clang-format on }; -static uint64_t opcode_timings_d8[8] = -{ +static uint64_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBs FSUBRs FDIVs FDIVRs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_d8_mod3[8] = -{ +static uint64_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUB FSUBR FDIV FDIVR*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_d9[8] = -{ +static uint64_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FLDENV FLDCW FSTENV FSTCW*/ PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) + // clang-format on }; -static uint64_t opcode_timings_d9_mod3[64] = -{ +static uint64_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), @@ -653,32 +667,35 @@ static uint64_t opcode_timings_d9_mod3[64] = PAIR_NP | FPU_CYCLES(64,2,2), INVALID, PAIR_NP | FPU_CYCLES(70,69,2), PAIR_NP | FPU_CYCLES(89,2,2), /* opFRNDINT opFSCALE opFSIN opFCOS*/ PAIR_NP | FPU_CYCLES(9,0,0), PAIR_NP | FPU_CYCLES(20,5,0), PAIR_NP | FPU_CYCLES(65,2,2), PAIR_NP | FPU_CYCLES(65,2,2) + // clang-format on }; -static uint64_t opcode_timings_da[8] = -{ +static uint64_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) + // clang-format on }; -static uint64_t opcode_timings_da_mod3[8] = -{ +static uint64_t opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; - -static uint64_t opcode_timings_db[8] = -{ +static uint64_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FLDe FSTPe*/ INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) + // clang-format on }; -static uint64_t opcode_timings_db_mod3[64] = -{ +static uint64_t opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -704,616 +721,606 @@ static uint64_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint64_t opcode_timings_dc[8] = -{ +static uint64_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBd FSUBRd FDIVd FDIVRd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_dc_mod3[8] = -{ +static uint64_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_dd[8] = -{ +static uint64_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FRSTOR FSAVE FSTSW*/ PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) + // clang-format on }; -static uint64_t opcode_timings_dd_mod3[8] = -{ +static uint64_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), /* FUCOM FUCOMP*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; -static uint64_t opcode_timings_de[8] = -{ +static uint64_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) + // clang-format on }; -static uint64_t opcode_timings_de_mod3[8] = -{ +static uint64_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBP FSUBRP FDIVP FDIVRP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) + // clang-format on }; -static uint64_t opcode_timings_df[8] = -{ +static uint64_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FILDiq FBSTP FISTPiq*/ INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) + // clang-format on }; -static uint64_t opcode_timings_df_mod3[8] = -{ +static uint64_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ PAIR_NP | FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID + // clang-format on }; -static uint64_t opcode_timings_81[8] = -{ +static uint64_t opcode_timings_81[8] = { + // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 + // clang-format on }; -static uint64_t opcode_timings_81_mod3[8] = -{ +static uint64_t opcode_timings_81_mod3[8] = { + // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG + // clang-format on }; -static uint64_t opcode_timings_8x[8] = -{ +static uint64_t opcode_timings_8x[8] = { + // clang-format off PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 + // clang-format on }; -static uint64_t opcode_timings_8x_mod3[8] = -{ +static uint64_t opcode_timings_8x_mod3[8] = { + // clang-format off PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG + // clang-format on }; -static int decode_delay, decode_delay_offset; +static int decode_delay; +static int decode_delay_offset; static uint8_t last_prefix; -static int prefixes; +static int prefixes; -static inline int COUNT(uint64_t timings, uint64_t deps, int op_32) +static inline int +COUNT(uint64_t timings, uint64_t deps, int op_32) { - if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) - return FPU_I_LATENCY(timings); - if (timings & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return ((uintptr_t)timings >> 8) & 0xff; - return (uintptr_t)timings & 0xff; - } - if (!(timings & PAIR_MASK)) - return timings & 0xffff; - if ((timings & PAIR_MASK) == PAIR_FX) - return timings & 0xffff; - if ((timings & PAIR_MASK) == PAIR_FXCH) - return timings & 0xffff; - if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) - timings &= 3; - switch (timings & CYCLES_MASK) - { - case CYCLES_REG: + if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) + return FPU_I_LATENCY(timings); + if (timings & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return ((uintptr_t) timings >> 8) & 0xff; + return (uintptr_t) timings & 0xff; + } + if (!(timings & PAIR_MASK)) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FX) + return timings & 0xffff; + if ((timings & PAIR_MASK) == PAIR_FXCH) + return timings & 0xffff; + if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) + timings &= 3; + switch (timings & CYCLES_MASK) { + case CYCLES_REG: + return 1; + case CYCLES_RM: + return 2; + case CYCLES_RMW: + return 3; + case CYCLES_BRANCH: + return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; + } + + fatal("Illegal COUNT %016llx\n", timings); + + return timings; +} + +static int +codegen_fpu_latencies(uint64_t deps, int reg) +{ + int latency = fpu_latency; + + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; + + return latency; +} + +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 + +static void +codegen_fpu_latency_clock(int count) +{ + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); +} + +static inline int +codegen_timing_has_displacement(uint32_t fetchdat, int op_32) +{ + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /*Has SIB*/ + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) + return 1; + } else { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) return 1; - case CYCLES_RM: - return 2; - case CYCLES_RMW: - return 3; - case CYCLES_BRANCH: - return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; } - - fatal("Illegal COUNT %016llx\n", timings); - - return timings; -} - -static int codegen_fpu_latencies(uint64_t deps, int reg) -{ - int latency = fpu_latency; - - if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) - latency = fpu_st_latency[0]; - if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) - latency = fpu_st_latency[1]; - if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) - latency = fpu_st_latency[reg]; - - return latency; -} - -#define SUB_AND_CLAMP(latency, count) \ - latency -= count; \ - if (latency < 0) \ - latency = 0 - -static void codegen_fpu_latency_clock(int count) -{ - SUB_AND_CLAMP(fpu_latency, count); - SUB_AND_CLAMP(fpu_st_latency[0], count); - SUB_AND_CLAMP(fpu_st_latency[1], count); - SUB_AND_CLAMP(fpu_st_latency[2], count); - SUB_AND_CLAMP(fpu_st_latency[3], count); - SUB_AND_CLAMP(fpu_st_latency[4], count); - SUB_AND_CLAMP(fpu_st_latency[5], count); - SUB_AND_CLAMP(fpu_st_latency[6], count); - SUB_AND_CLAMP(fpu_st_latency[7], count); -} - -static inline int codegen_timing_has_displacement(uint32_t fetchdat, int op_32) -{ - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /*Has SIB*/ - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) - return 1; - } - else - { - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) - return 1; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) - return 1; - } - return 0; + } else { + if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) + return 1; + } + return 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on Pentium MMX parallel decoding*/ -static inline int codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) +static inline int +codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) { - int len = prefixes; - if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) - { - len += 2; /*Opcode + ModR/M*/ - if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) - len++; - if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) - len += (op_32 & 0x100) ? 4 : 2; + int len = prefixes; + if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) { + len += 2; /*Opcode + ModR/M*/ + if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) + len++; + if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) + len += (op_32 & 0x100) ? 4 : 2; - if (op_32 & 0x200) - { - if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) - { - /* Has SIB*/ - len++; - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0x700) == 0x500) - len += 4; - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 4; - else if ((fetchdat & 0xc7) == 0x05) - len += 4; - } - } - else - { - if ((fetchdat & 0xc0) == 0x40) - len++; - else if ((fetchdat & 0xc0) == 0x80) - len += 2; - else if ((fetchdat & 0xc7) == 0x06) - len += 2; - } + if (op_32 & 0x200) { + if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { + /* Has SIB*/ + len++; + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0x700) == 0x500) + len += 4; + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 4; + else if ((fetchdat & 0xc7) == 0x05) + len += 4; + } + } else { + if ((fetchdat & 0xc0) == 0x40) + len++; + else if ((fetchdat & 0xc0) == 0x80) + len += 2; + else if ((fetchdat & 0xc7) == 0x06) + len += 2; } + } - return len; + return len; } -void codegen_timing_pentium_block_start(void) +void +codegen_timing_pentium_block_start(void) { - u_pipe_full = decode_delay = decode_delay_offset = 0; + u_pipe_full = decode_delay = decode_delay_offset = 0; } -void codegen_timing_pentium_start(void) +void +codegen_timing_pentium_start(void) { - last_prefix = 0; - prefixes = 0; + last_prefix = 0; + prefixes = 0; } -void codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) { - prefixes++; - if ((prefix & 0xf8) == 0xd8) - { - last_prefix = prefix; - return; - } - if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) - { - /*On Pentium MMX 0fh prefix is 'free'*/ - last_prefix = prefix; - return; - } - if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) - { - /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ - decode_delay_offset += 2; - last_prefix = prefix; - return; - } - if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) - { - /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ - last_prefix = prefix; - return; - } - /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed - by execution of previous instructions*/ - decode_delay_offset++; + prefixes++; + if ((prefix & 0xf8) == 0xd8) { last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) { + /*On Pentium MMX 0fh prefix is 'free'*/ + last_prefix = prefix; + return; + } + if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) { + /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ + decode_delay_offset += 2; + last_prefix = prefix; + return; + } + if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) { + /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ + last_prefix = prefix; + return; + } + /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not - cause AGIs with each other, but do with instructions that use it explicitly*/ - if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) - addr_regmask |= (1 << REG_ESP); + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); - return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; } -static void codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +static void +codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) { - int instr_cycles, latency = 0; + int instr_cycles; + int latency = 0; - if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) - instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); - else - { -/* if (timings[opcode] & FPU_WRITE_ST0) - fatal("FPU_WRITE_ST0\n"); - if (timings[opcode] & FPU_WRITE_ST1) - fatal("FPU_WRITE_ST1\n"); - if (timings[opcode] & FPU_WRITE_STREG) - fatal("FPU_WRITE_STREG\n");*/ - instr_cycles = 0; - } + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else { +#if 0 + if (timings[opcode] & FPU_WRITE_ST0) + fatal("FPU_WRITE_ST0\n"); + if (timings[opcode] & FPU_WRITE_ST1) + fatal("FPU_WRITE_ST1\n"); + if (timings[opcode] & FPU_WRITE_STREG) + fatal("FPU_WRITE_STREG\n");*/ +#endif + instr_cycles = 0; + } - if ((decode_delay + decode_delay_offset) > 0) - codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); - else - codegen_fpu_latency_clock(instr_cycles); - instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); - instr_cycles += exec_delay; - if ((decode_delay + decode_delay_offset) > 0) - codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; - else - codegen_block_cycles += instr_cycles; + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; - decode_delay = (-instr_cycles) + 1; + decode_delay = (-instr_cycles) + 1; + if (deps[opcode] & FPU_POP) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c + 1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) { + for (uint8_t c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c + 2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) { + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c + 1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) { +#if 0 + if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ +#endif + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) { +#if 0 + if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ +#endif + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps[opcode] & FPU_POP) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c] = fpu_st_latency[c+1]; - fpu_st_latency[7] = 0; - } - if (deps[opcode] & FPU_POP2) - { - int c; - - for (c = 0; c < 6; c++) - fpu_st_latency[c] = fpu_st_latency[c+2]; - fpu_st_latency[6] = fpu_st_latency[7] = 0; - } - if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) - { - fpu_latency = FPU_F_LATENCY(timings[opcode]); - } - - if (deps[opcode] & FPU_PUSH) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c+1] = fpu_st_latency[c]; - fpu_st_latency[0] = 0; - } - if (deps[opcode] & FPU_WRITE_ST0) - { -/* if (fpu_st_latency[0]) - fatal("Bad latency ST0\n");*/ - fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_ST1) - { -/* if (fpu_st_latency[1]) - fatal("Bad latency ST1\n");*/ - fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps[opcode] & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && - !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) - { - fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) { + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); } + } } -void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint64_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); - int agi_stall = 0; + uint64_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (u_pipe_full) { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; - opcode = (fetchdat >> 3) & 7; - break; + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) != PAIR_FXCH) + goto nopair; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) + goto nopair; - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } + if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) == PAIR_FXCH) { + int temp; - if (u_pipe_full) - { - uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - - if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && - (timings[opcode] & PAIR_MASK) != PAIR_FXCH) - goto nopair; - - if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && - (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) - goto nopair; - - if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && - (timings[opcode] & PAIR_MASK) == PAIR_FXCH) - { - int temp; - - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - - temp = fpu_st_latency[fetchdat & 7]; - fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; - fpu_st_latency[0] = temp; - - u_pipe_full = 0; - decode_delay_offset = 0; - regmask_modified = u_pipe_regmask; - addr_regmask = 0; - return; - } - - if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) <= 0) - { - int has_displacement; - - if (timings[opcode] & CYCLES_HASIMM) - has_displacement = codegen_timing_has_displacement(fetchdat, op_32); - else - has_displacement = 0; - - if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) - { - int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; - int t2 = timings[opcode] & CYCLES_MASK; - int t_pair; - uint64_t temp_timing; - uint64_t temp_deps = 0; - - if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) - t1 &= 3; - if (!(timings[opcode] & PAIR_FPU)) - t2 &= 3; - - if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) - fatal("Pair out of range\n"); - - t_pair = pair_timings[t1][t2]; - if (t_pair < 1) - fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); - - /*Instruction can pair with previous*/ - temp_timing = t_pair; - if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); - u_pipe_full = 0; - decode_delay_offset = 0; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; - addr_regmask = 0; - return; - } - } -nopair: - /*Instruction can not pair with previous*/ - /*Run previous now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; - regmask_modified = u_pipe_regmask; - addr_regmask = 0; - } - - if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) - { - int has_displacement; - - if (timings[opcode] & CYCLES_HASIMM) - has_displacement = codegen_timing_has_displacement(fetchdat, op_32); - else - has_displacement = 0; - - if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) - { - /*Instruction might pair with next*/ - u_pipe_full = 1; - u_pipe_opcode = opcode; - u_pipe_timings = timings; - u_pipe_op_32 = op_32; - u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - u_pipe_fetchdat = fetchdat; - u_pipe_decode_delay_offset = decode_delay_offset; - u_pipe_deps = deps; - decode_delay_offset = 0; - return; - } - } - /*Instruction can not pair and must run now*/ - if (check_agi(deps, opcode, fetchdat, op_32)) + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; - codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); - addr_regmask = 0; -} -void codegen_timing_pentium_block_end(void) -{ - if (u_pipe_full) - { - /*Run previous now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - codegen_block_cycles++; - codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; - u_pipe_full = 0; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + + temp = fpu_st_latency[fetchdat & 7]; + fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; + fpu_st_latency[0] = temp; + + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + return; } + + if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay + decode_delay_offset + u_pipe_decode_delay_offset) <= 0) { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { + int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; + int t2 = timings[opcode] & CYCLES_MASK; + int t_pair; + uint64_t temp_timing; + uint64_t temp_deps = 0; + + if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) + t1 &= 3; + if (!(timings[opcode] & PAIR_FPU)) + t2 &= 3; + + if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) + fatal("Pair out of range\n"); + + t_pair = pair_timings[t1][t2]; + if (t_pair < 1) + fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); + + /*Instruction can pair with previous*/ + temp_timing = t_pair; + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + addr_regmask = 0; + return; + } + } +nopair: + /*Instruction can not pair with previous*/ + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; + addr_regmask = 0; + } + + if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) { + int has_displacement; + + if (timings[opcode] & CYCLES_HASIMM) + has_displacement = codegen_timing_has_displacement(fetchdat, op_32); + else + has_displacement = 0; + + if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { + /*Instruction might pair with next*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + } + /*Instruction can not pair and must run now*/ + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + addr_regmask = 0; } -codegen_timing_t codegen_timing_pentium = +void +codegen_timing_pentium_block_end(void) { - codegen_timing_pentium_start, - codegen_timing_pentium_prefix, - codegen_timing_pentium_opcode, - codegen_timing_pentium_block_start, - codegen_timing_pentium_block_end, - NULL + if (u_pipe_full) { + /*Run previous now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + codegen_block_cycles++; + codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_pentium = { + codegen_timing_pentium_start, + codegen_timing_pentium_prefix, + codegen_timing_pentium_opcode, + codegen_timing_pentium_block_start, + codegen_timing_pentium_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_winchip.c b/src/cpu/codegen_timing_winchip.c index a1ee02b63..11dd912b4 100644 --- a/src/cpu/codegen_timing_winchip.c +++ b/src/cpu/codegen_timing_winchip.c @@ -4,19 +4,21 @@ #include #include <86box/86box.h> #include "cpu.h" +#include <86box/mem.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_ops.h" #include "x87.h" -#include <86box/mem.h> #include "codegen.h" #include "codegen_ops.h" #include "codegen_timing_common.h" -#define CYCLES(c) (int *)c -#define CYCLES2(c16, c32) (int *)((-1 & ~0xffff) | c16 | (c32 << 8)) +#define CYCLES(c) (int *) c +#define CYCLES2(c16, c32) (int *) ((-1 & ~0xffff) | c16 | (c32 << 8)) -static int *opcode_timings[256] = -{ +static int *opcode_timings[256] = { + // clang-format off /*00*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_mr, &timing_mr, &timing_rm, &timing_rm, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -36,10 +38,11 @@ static int *opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_mod3[256] = -{ +static int *opcode_timings_mod3[256] = { + // clang-format off /*00*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), NULL, /*10*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(2), CYCLES(3), /*20*/ &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, &timing_rr, CYCLES(4), CYCLES(3), @@ -59,10 +62,11 @@ static int *opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), NULL, NULL, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), NULL + // clang-format on }; -static int *opcode_timings_0f[256] = -{ +static int *opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -82,9 +86,10 @@ static int *opcode_timings_0f[256] = /*d0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*e0*/ NULL, &timing_rm, &timing_rm, NULL, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, /*f0*/ NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, NULL, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, &timing_rm, &timing_rm, &timing_rm, NULL, + // clang-format on }; -static int *opcode_timings_0f_mod3[256] = -{ +static int *opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), NULL, CYCLES(195), CYCLES(7), NULL, CYCLES(1000), CYCLES(10000), NULL, NULL, NULL, NULL, NULL, NULL, /*10*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -104,60 +109,72 @@ static int *opcode_timings_0f_mod3[256] = /*d0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*e0*/ NULL, &timing_rr, &timing_rr, NULL, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, /*f0*/ NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, NULL, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, &timing_rr, &timing_rr, &timing_rr, NULL, + // clang-format on }; -static int *opcode_timings_shift[8] = -{ +static int *opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) + // clang-format on }; -static int *opcode_timings_shift_mod3[8] = -{ +static int *opcode_timings_shift_mod3[8] = { + // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static int *opcode_timings_f6[8] = -{ +static int *opcode_timings_f6[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f6_mod3[8] = -{ +static int *opcode_timings_f6_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static int *opcode_timings_f7[8] = -{ +static int *opcode_timings_f7[8] = { + // clang-format off &timing_rm, NULL, &timing_mm, &timing_mm, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_f7_mod3[8] = -{ +static int *opcode_timings_f7_mod3[8] = { + // clang-format off &timing_rr, NULL, &timing_rr, &timing_rr, CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static int *opcode_timings_ff[8] = -{ +static int *opcode_timings_ff[8] = { + // clang-format off &timing_mm, &timing_mm, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_ff_mod3[8] = -{ +static int *opcode_timings_ff_mod3[8] = { + // clang-format off &timing_rr, &timing_rr, CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), NULL + // clang-format on }; -static int *opcode_timings_d8[8] = -{ +static int *opcode_timings_d8[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_d8_mod3[8] = -{ +static int *opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), CYCLES(3), CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_d9[8] = -{ +static int *opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs FLDENV FLDCW FSTENV FSTCW*/ CYCLES(2), NULL, CYCLES(7), CYCLES(7), CYCLES(34), CYCLES(4), CYCLES(67), CYCLES(3) + // clang-format on }; -static int *opcode_timings_d9_mod3[64] = -{ +static int *opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), /*FXCH*/ @@ -174,26 +191,29 @@ static int *opcode_timings_d9_mod3[64] = CYCLES(300), CYCLES(58), CYCLES(676), CYCLES(355), NULL, NULL, CYCLES(3), CYCLES(3), /* opFPREM opFSQRT opFSINCOS opFRNDINT opFSCALE opFSIN opFCOS*/ CYCLES(70), NULL, CYCLES(72), CYCLES(292), CYCLES(21), CYCLES(30), CYCLES(474), CYCLES(474) + // clang-format on }; -static int *opcode_timings_da[8] = -{ +static int *opcode_timings_da[8] = { + // clang-format off /* FADDil FMULil FCOMil FCOMPil FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_da_mod3[8] = -{ +static int *opcode_timings_da_mod3[8] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, CYCLES(5), NULL, NULL + // clang-format on }; - -static int *opcode_timings_db[8] = -{ +static int *opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil FLDe FSTPe*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), NULL, CYCLES(8) + // clang-format on }; -static int *opcode_timings_db_mod3[64] = -{ +static int *opcode_timings_db_mod3[64] = { + // clang-format off NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -203,54 +223,63 @@ static int *opcode_timings_db_mod3[64] = NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // clang-format on }; -static int *opcode_timings_dc[8] = -{ +static int *opcode_timings_dc[8] = { + // clang-format off /* opFADDd_a16 opFMULd_a16 opFCOMd_a16 opFCOMPd_a16 opFSUBd_a16 opFSUBRd_a16 opFDIVd_a16 opFDIVRd_a16*/ CYCLES(6), CYCLES(8), CYCLES(5), CYCLES(5), CYCLES(6), CYCLES(6), CYCLES(74), CYCLES(74) + // clang-format on }; -static int *opcode_timings_dc_mod3[8] = -{ +static int *opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr opFSUBRr opFSUBr opFDIVRr opFDIVr*/ CYCLES(4), CYCLES(6), NULL, NULL, CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_dd[8] = -{ +static int *opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd FRSTOR FSAVE FSTSW*/ CYCLES(2), NULL, CYCLES(8), CYCLES(8), CYCLES(131), NULL, CYCLES(154), CYCLES(5) + // clang-format on }; -static int *opcode_timings_dd_mod3[8] = -{ +static int *opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL + // clang-format on }; -static int *opcode_timings_de[8] = -{ +static int *opcode_timings_de[8] = { + // clang-format off /* FADDiw FMULiw FCOMiw FCOMPiw FSUBil FSUBRil FDIVil FDIVRil*/ CYCLES(10), CYCLES(12), CYCLES(9), CYCLES(9), CYCLES(10), CYCLES(10), CYCLES(78), CYCLES(78) + // clang-format on }; -static int *opcode_timings_de_mod3[8] = -{ +static int *opcode_timings_de_mod3[8] = { + // clang-format off /* FADD FMUL FCOMPP FSUB FSUBR FDIV FDIVR*/ CYCLES(4), CYCLES(6), NULL, CYCLES(3), CYCLES(4), CYCLES(4), CYCLES(72), CYCLES(72) + // clang-format on }; -static int *opcode_timings_df[8] = -{ +static int *opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw FILDiq FBSTP FISTPiq*/ CYCLES(6), NULL, CYCLES(7), CYCLES(7), NULL, CYCLES(8), CYCLES(172), CYCLES(8) + // clang-format on }; -static int *opcode_timings_df_mod3[8] = -{ +static int *opcode_timings_df_mod3[8] = { + // clang-format off /* FFREE FST FSTP FUCOM FUCOMP*/ CYCLES(3), NULL, CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(3), NULL, NULL + // clang-format on }; -static int *opcode_timings_8x[8] = -{ +static int *opcode_timings_8x[8] = { + // clang-format off &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm }; static int *opcode_timings_8x_mod3[8] = @@ -264,158 +293,169 @@ static int *opcode_timings_81[8] = static int *opcode_timings_81_mod3[8] = { &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_mr, &timing_rm + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static inline int COUNT(int *c, int op_32) +static inline int +COUNT(int *c, int op_32) { - if ((uintptr_t)c <= 10000) - return (int)(uintptr_t)c; - if (((uintptr_t)c & ~0xffff) == (-1 & ~0xffff)) - { - if (op_32 & 0x100) - return ((uintptr_t)c >> 8) & 0xff; - return (uintptr_t)c & 0xff; - } - return *c; + if ((uintptr_t) c <= 10000) + return (int) (uintptr_t) c; + if (((uintptr_t) c & ~0xffff) == (-1 & ~0xffff)) { + if (op_32 & 0x100) + return ((uintptr_t) c >> 8) & 0xff; + return (uintptr_t) c & 0xff; + } + return *c; } -void codegen_timing_winchip_block_start(void) +void +codegen_timing_winchip_block_start(void) { - regmask_modified = 0; + regmask_modified = 0; } -void codegen_timing_winchip_start(void) +void +codegen_timing_winchip_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -void codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) +void +codegen_timing_winchip_prefix(uint8_t prefix, uint32_t fetchdat) { - timing_count += COUNT(opcode_timings[prefix], 0); - last_prefix = prefix; + timing_count += COUNT(opcode_timings[prefix], 0); + last_prefix = prefix; } -void codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +void +codegen_timing_winchip_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - int **timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); + int **timings; + const uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + timing_count += COUNT(timings[opcode], op_32); + if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) + timing_count++; /*AGI stall*/ + codegen_block_cycles += timing_count; - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; - - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - timing_count += COUNT(timings[opcode], op_32); - if (regmask_modified & get_addr_regmask(deps[opcode], fetchdat, op_32)) - timing_count++; /*AGI stall*/ - codegen_block_cycles += timing_count; - - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -void codegen_timing_winchip_block_end(void) +void +codegen_timing_winchip_block_end(void) { + // } -codegen_timing_t codegen_timing_winchip = -{ - codegen_timing_winchip_start, - codegen_timing_winchip_prefix, - codegen_timing_winchip_opcode, - codegen_timing_winchip_block_start, - codegen_timing_winchip_block_end, - NULL +codegen_timing_t codegen_timing_winchip = { + codegen_timing_winchip_start, + codegen_timing_winchip_prefix, + codegen_timing_winchip_opcode, + codegen_timing_winchip_block_start, + codegen_timing_winchip_block_end, + NULL }; diff --git a/src/cpu/codegen_timing_winchip2.c b/src/cpu/codegen_timing_winchip2.c index f96304072..d4e32611e 100644 --- a/src/cpu/codegen_timing_winchip2.c +++ b/src/cpu/codegen_timing_winchip2.c @@ -14,6 +14,7 @@ #include <86box/86box.h> #include "cpu.h" #include <86box/mem.h> +#include <86box/plat_unused.h> #include "x86.h" #include "x86_ops.h" @@ -23,46 +24,46 @@ #include "codegen_timing_common.h" /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ -#define CYCLES_HAS_MULTI (1 << 31) +#define CYCLES_HAS_MULTI (1 << 31) -#define CYCLES_FPU (1 << 30) +#define CYCLES_FPU (1 << 30) #define CYCLES_IS_MMX_MUL (1 << 29) #define CYCLES_IS_MMX_SHIFT (1 << 28) #define CYCLES_IS_MMX_ANY (1 << 27) #define CYCLES_IS_3DNOW (1 << 26) -#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c) +#define CYCLES_MMX_MUL(c) (CYCLES_IS_MMX_MUL | c) #define CYCLES_MMX_SHIFT(c) (CYCLES_IS_MMX_SHIFT | c) -#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c) -#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c) +#define CYCLES_MMX_ANY(c) (CYCLES_IS_MMX_ANY | c) +#define CYCLES_3DNOW(c) (CYCLES_IS_3DNOW | c) -#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW) +#define CYCLES_IS_MMX (CYCLES_IS_MMX_MUL | CYCLES_IS_MMX_SHIFT | CYCLES_IS_MMX_ANY | CYCLES_IS_3DNOW) -#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX)) +#define GET_CYCLES(c) (c & ~(CYCLES_HAS_MULTI | CYCLES_FPU | CYCLES_IS_MMX)) -#define CYCLES(c) c -#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) +#define CYCLES(c) c +#define CYCLES2(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) /*comp_time = cycles until instruction complete i_overlap = cycles that overlap with integer f_overlap = cycles that overlap with subsequent FPU*/ #define FPU_CYCLES(comp_time, i_overlap, f_overlap) (comp_time) | (i_overlap << 8) | (f_overlap << 16) | CYCLES_FPU -#define FPU_COMP_TIME(timing) (timing & 0xff) -#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff) -#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff) +#define FPU_COMP_TIME(timing) (timing & 0xff) +#define FPU_I_OVERLAP(timing) ((timing >> 8) & 0xff) +#define FPU_F_OVERLAP(timing) ((timing >> 16) & 0xff) -#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) +#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) -#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) +#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) -#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff) +#define FPU_RESULT_LATENCY(timing) ((timing >> 8) & 0xff) -#define INVALID 0 +#define INVALID 0 -static uint32_t opcode_timings[256] = -{ +static uint32_t opcode_timings[256] = { + // clang-format off /*00*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), /*20*/ CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), @@ -82,10 +83,11 @@ static uint32_t opcode_timings[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID + // clang-format on }; -static uint32_t opcode_timings_mod3[256] = -{ +static uint32_t opcode_timings_mod3[256] = { + // clang-format off /*00*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), INVALID, /*10*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(2), CYCLES(3), /*20*/ CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(1), CYCLES(3), @@ -105,10 +107,11 @@ static uint32_t opcode_timings_mod3[256] = /*d0*/ CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(4), CYCLES(15), CYCLES(14), CYCLES(2), CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*e0*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(5), CYCLES(14), CYCLES(14), CYCLES(16), CYCLES(16), CYCLES(3), CYCLES(3), CYCLES(17), CYCLES(3), CYCLES(14), CYCLES(14), CYCLES(14), CYCLES(14), /*f0*/ CYCLES(4), CYCLES(0), CYCLES(0), CYCLES(0), CYCLES(4), CYCLES(2), INVALID, INVALID, CYCLES(2), CYCLES(2), CYCLES(3), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(3), INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f[256] = -{ +static uint32_t opcode_timings_0f[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -128,9 +131,10 @@ static uint32_t opcode_timings_0f[256] = /*d0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), /*e0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), /*f0*/ INVALID, CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), CYCLES_MMX_SHIFT(2), INVALID, CYCLES_MMX_MUL(2), INVALID, INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), CYCLES_MMX_ANY(2), INVALID, + // clang-format on }; -static uint32_t opcode_timings_0f_mod3[256] = -{ +static uint32_t opcode_timings_0f_mod3[256] = { + // clang-format off /*00*/ CYCLES(20), CYCLES(11), CYCLES(11), CYCLES(10), INVALID, CYCLES(195), CYCLES(7), INVALID, CYCLES(1000), CYCLES(10000), INVALID, INVALID, INVALID, CYCLES_3DNOW(1), CYCLES(1), CYCLES_3DNOW(1), /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -150,66 +154,78 @@ static uint32_t opcode_timings_0f_mod3[256] = /*d0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), /*e0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), /*f0*/ INVALID, CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), CYCLES_MMX_SHIFT(1), INVALID, CYCLES_MMX_MUL(1), INVALID, INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), CYCLES_MMX_ANY(1), INVALID, + // clang-format on }; -static uint32_t opcode_timings_shift[8] = -{ +static uint32_t opcode_timings_shift[8] = { + // clang-format off CYCLES(7), CYCLES(7), CYCLES(10), CYCLES(10), CYCLES(7), CYCLES(7), CYCLES(7), CYCLES(7) + // clang-format on }; -static uint32_t opcode_timings_shift_mod3[8] = -{ +static uint32_t opcode_timings_shift_mod3[8] = { + // clang-format off CYCLES(3), CYCLES(3), CYCLES(9), CYCLES(9), CYCLES(3), CYCLES(3), CYCLES(3), CYCLES(3) + // clang-format on }; -static uint32_t opcode_timings_f6[8] = -{ +static uint32_t opcode_timings_f6[8] = { + // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static uint32_t opcode_timings_f6_mod3[8] = -{ +static uint32_t opcode_timings_f6_mod3[8] = { + // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(13), CYCLES(14), CYCLES(16), CYCLES(19) + // clang-format on }; -static uint32_t opcode_timings_f7[8] = -{ +static uint32_t opcode_timings_f7[8] = { + // clang-format off CYCLES(2), INVALID, CYCLES(2), CYCLES(2), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static uint32_t opcode_timings_f7_mod3[8] = -{ +static uint32_t opcode_timings_f7_mod3[8] = { + // clang-format off CYCLES(1), INVALID, CYCLES(1), CYCLES(1), CYCLES(21), CYCLES2(22,38), CYCLES2(24,40), CYCLES2(27,43) + // clang-format on }; -static uint32_t opcode_timings_ff[8] = -{ +static uint32_t opcode_timings_ff[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID + // clang-format on }; -static uint32_t opcode_timings_ff_mod3[8] = -{ +static uint32_t opcode_timings_ff_mod3[8] = { + // clang-format off CYCLES(1), CYCLES(1), CYCLES(5), CYCLES(0), CYCLES(5), CYCLES(0), CYCLES(5), INVALID + // clang-format on }; -static uint32_t opcode_timings_d8[8] = -{ +static uint32_t opcode_timings_d8[8] = { + // clang-format off /* FADDs FMULs FCOMs FCOMPs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUBs FSUBRs FDIVs FDIVRs*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_d8_mod3[8] = -{ +static uint32_t opcode_timings_d8_mod3[8] = { + // clang-format off /* FADD FMUL FCOM FCOMP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUB FSUBR FDIV FDIVR*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_d9[8] = -{ +static uint32_t opcode_timings_d9[8] = { + // clang-format off /* FLDs FSTs FSTPs*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), /* FLDENV FLDCW FSTENV FSTCW*/ FPU_CYCLES(32,0,0), FPU_CYCLES(8,0,0), FPU_CYCLES(48,0,0), FPU_CYCLES(2,0,0) + // clang-format on }; -static uint32_t opcode_timings_d9_mod3[64] = -{ +static uint32_t opcode_timings_d9_mod3[64] = { + // clang-format off /*FLD*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), @@ -238,32 +254,35 @@ static uint32_t opcode_timings_d9_mod3[64] = FPU_CYCLES(64,2,2), INVALID, FPU_CYCLES(70,69,2),FPU_CYCLES(89,2,2), /* opFRNDINT opFSCALE opFSIN opFCOS*/ FPU_CYCLES(9,0,0), FPU_CYCLES(20,5,0), FPU_CYCLES(65,2,2), FPU_CYCLES(65,2,2) + // clang-format on }; -static uint32_t opcode_timings_da[8] = -{ +static uint32_t opcode_timings_da[8] = { + // clang-format off /* FIADDl FIMULl FICOMl FICOMPl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) + // clang-format on }; -static uint32_t opcode_timings_da_mod3[8] = -{ +static uint32_t opcode_timings_da_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, FPU_CYCLES(1,0,0), INVALID, INVALID + // clang-format on }; - -static uint32_t opcode_timings_db[8] = -{ +static uint32_t opcode_timings_db[8] = { + // clang-format off /* FLDil FSTil FSTPil*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), /* FLDe FSTPe*/ INVALID, FPU_CYCLES(3,0,0), INVALID, FPU_CYCLES(3,0,0) + // clang-format on }; -static uint32_t opcode_timings_db_mod3[64] = -{ +static uint32_t opcode_timings_db_mod3[64] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, @@ -289,455 +308,469 @@ static uint32_t opcode_timings_db_mod3[64] = INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, + // clang-format on }; -static uint32_t opcode_timings_dc[8] = -{ +static uint32_t opcode_timings_dc[8] = { + // clang-format off /* FADDd FMULd FCOMd FCOMPd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FSUBd FSUBRd FDIVd FDIVRd*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_dc_mod3[8] = -{ +static uint32_t opcode_timings_dc_mod3[8] = { + // clang-format off /* opFADDr opFMULr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2),FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_dd[8] = -{ +static uint32_t opcode_timings_dd[8] = { + // clang-format off /* FLDd FSTd FSTPd*/ FPU_CYCLES(1,0,0), INVALID, FPU_CYCLES(2,0,0), FPU_CYCLES(2,0,0), /* FRSTOR FSAVE FSTSW*/ FPU_CYCLES(70,0,0), INVALID, FPU_CYCLES(127,0,0), FPU_CYCLES(6,0,0) + // clang-format on }; -static uint32_t opcode_timings_dd_mod3[8] = -{ +static uint32_t opcode_timings_dd_mod3[8] = { + // clang-format off /* FFFREE FST FSTP*/ FPU_CYCLES(2,0,0), INVALID, FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0), /* FUCOM FUCOMP*/ FPU_CYCLES(1,0,0), FPU_CYCLES(1,0,0),INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_de[8] = -{ +static uint32_t opcode_timings_de[8] = { + // clang-format off /* FIADDw FIMULw FICOMw FICOMPw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(4,0,0), FPU_CYCLES(4,0,0), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ FPU_CYCLES(6,2,2), FPU_CYCLES(6,2,2), FPU_CYCLES(42,38,2), FPU_CYCLES(42,38,2) + // clang-format on }; -static uint32_t opcode_timings_de_mod3[8] = -{ +static uint32_t opcode_timings_de_mod3[8] = { + // clang-format off /* FADDP FMULP FCOMPP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(1,0,0), /* FSUBP FSUBRP FDIVP FDIVRP*/ FPU_CYCLES(3,2,2), FPU_CYCLES(3,2,2), FPU_CYCLES(39,38,2), FPU_CYCLES(39,38,2) + // clang-format on }; -static uint32_t opcode_timings_df[8] = -{ +static uint32_t opcode_timings_df[8] = { + // clang-format off /* FILDiw FISTiw FISTPiw*/ FPU_CYCLES(3,2,2), INVALID, FPU_CYCLES(6,0,0), FPU_CYCLES(6,0,0), /* FILDiq FBSTP FISTPiq*/ INVALID, FPU_CYCLES(3,2,2), FPU_CYCLES(148,0,0), FPU_CYCLES(6,0,0) + // clang-format on }; -static uint32_t opcode_timings_df_mod3[8] = -{ +static uint32_t opcode_timings_df_mod3[8] = { + // clang-format off INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID + // clang-format on }; -static uint32_t opcode_timings_8x[8] = -{ +static uint32_t opcode_timings_8x[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_8x_mod3[8] = -{ +static uint32_t opcode_timings_8x_mod3[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_81[8] = -{ +static uint32_t opcode_timings_81[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static uint32_t opcode_timings_81_mod3[8] = -{ +static uint32_t opcode_timings_81_mod3[8] = { + // clang-format off CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2), CYCLES(2) + // clang-format on }; -static int timing_count; -static uint8_t last_prefix; +static int timing_count; +static uint8_t last_prefix; static uint32_t regmask_modified; -static int decode_delay, decode_delay_offset; -static int fpu_latency; -static int fpu_st_latency[8]; +static int decode_delay; +static int decode_delay_offset; +static int fpu_latency; +static int fpu_st_latency[8]; -static int u_pipe_full; -static uint32_t u_pipe_opcode; +static int u_pipe_full; +static uint32_t u_pipe_opcode; static uint32_t *u_pipe_timings; -static uint32_t u_pipe_op_32; -static uint32_t u_pipe_regmask; -static uint32_t u_pipe_fetchdat; -static int u_pipe_decode_delay_offset; +static uint32_t u_pipe_op_32; +static uint32_t u_pipe_regmask; +static uint32_t u_pipe_fetchdat; +static int u_pipe_decode_delay_offset; static uint64_t *u_pipe_deps; -int can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b) +int +can_pair(uint32_t timing_a, uint32_t timing_b, uint8_t regmask_b) { - /*Only MMX/3DNow instructions can pair*/ - if (!(timing_b & CYCLES_IS_MMX)) - return 0; - /*Only one MMX multiply per cycle*/ - if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL)) - return 0; - /*Only one MMX shift/pack per cycle*/ - if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT)) - return 0; - /*Second instruction can not access registers written by first*/ - if (u_pipe_regmask & regmask_b) - return 0; - /*Must have had enough time to decode prefixes*/ - if ((decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) > 0) - return 0; + /*Only MMX/3DNow instructions can pair*/ + if (!(timing_b & CYCLES_IS_MMX)) + return 0; + /*Only one MMX multiply per cycle*/ + if ((timing_a & CYCLES_IS_MMX_MUL) && (timing_b & CYCLES_IS_MMX_MUL)) + return 0; + /*Only one MMX shift/pack per cycle*/ + if ((timing_a & CYCLES_IS_MMX_SHIFT) && (timing_b & CYCLES_IS_MMX_SHIFT)) + return 0; + /*Second instruction can not access registers written by first*/ + if (u_pipe_regmask & regmask_b) + return 0; + /*Must have had enough time to decode prefixes*/ + if ((decode_delay + decode_delay_offset + u_pipe_decode_delay_offset) > 0) + return 0; - return 1; + return 1; } -static inline int COUNT(uint32_t c, int op_32) +static inline int +COUNT(uint32_t c, int op_32) { - if (c & CYCLES_FPU) - return FPU_I_LATENCY(c); - if (c & CYCLES_HAS_MULTI) - { - if (op_32 & 0x100) - return (c >> 8) & 0xff; - return c & 0xff; - } - return GET_CYCLES(c); + if (c & CYCLES_FPU) + return FPU_I_LATENCY(c); + if (c & CYCLES_HAS_MULTI) { + if (op_32 & 0x100) + return (c >> 8) & 0xff; + return c & 0xff; + } + return GET_CYCLES(c); } -static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) +static int +check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { - uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); + uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); - /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not - cause AGIs with each other, but do with instructions that use it explicitly*/ - if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) - addr_regmask |= (1 << REG_ESP); + /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not + cause AGIs with each other, but do with instructions that use it explicitly*/ + if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) + addr_regmask |= (1 << REG_ESP); - return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; + return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; } -static int codegen_fpu_latencies(uint64_t deps, int reg) +static int +codegen_fpu_latencies(uint64_t deps, int reg) { - int latency = fpu_latency; + int latency = fpu_latency; - if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) - latency = fpu_st_latency[0]; - if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) - latency = fpu_st_latency[1]; - if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) - latency = fpu_st_latency[reg]; + if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) + latency = fpu_st_latency[0]; + if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) + latency = fpu_st_latency[1]; + if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) + latency = fpu_st_latency[reg]; - return latency; + return latency; } -#define SUB_AND_CLAMP(latency, count) \ - latency -= count; \ - if (latency < 0) \ - latency = 0 +#define SUB_AND_CLAMP(latency, count) \ + latency -= count; \ + if (latency < 0) \ + latency = 0 -static void codegen_fpu_latency_clock(int count) +static void +codegen_fpu_latency_clock(int count) { - SUB_AND_CLAMP(fpu_latency, count); - SUB_AND_CLAMP(fpu_st_latency[0], count); - SUB_AND_CLAMP(fpu_st_latency[1], count); - SUB_AND_CLAMP(fpu_st_latency[2], count); - SUB_AND_CLAMP(fpu_st_latency[3], count); - SUB_AND_CLAMP(fpu_st_latency[4], count); - SUB_AND_CLAMP(fpu_st_latency[5], count); - SUB_AND_CLAMP(fpu_st_latency[6], count); - SUB_AND_CLAMP(fpu_st_latency[7], count); + SUB_AND_CLAMP(fpu_latency, count); + SUB_AND_CLAMP(fpu_st_latency[0], count); + SUB_AND_CLAMP(fpu_st_latency[1], count); + SUB_AND_CLAMP(fpu_st_latency[2], count); + SUB_AND_CLAMP(fpu_st_latency[3], count); + SUB_AND_CLAMP(fpu_st_latency[4], count); + SUB_AND_CLAMP(fpu_st_latency[5], count); + SUB_AND_CLAMP(fpu_st_latency[6], count); + SUB_AND_CLAMP(fpu_st_latency[7], count); } -static void codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) +static void +codegen_instruction(uint32_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) { - int instr_cycles, latency = 0; + int instr_cycles; + int latency = 0; - if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH)) - instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); - else - instr_cycles = 0; + if ((timings[opcode] & CYCLES_FPU) && !(deps[opcode] & FPU_FXCH)) + instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); + else + instr_cycles = 0; - if ((decode_delay + decode_delay_offset) > 0) - codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); - else - codegen_fpu_latency_clock(instr_cycles); - instr_cycles += COUNT(timings[opcode], op_32); - instr_cycles += exec_delay; - if ((decode_delay + decode_delay_offset) > 0) - codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; - else - codegen_block_cycles += instr_cycles; - decode_delay = (-instr_cycles) + 1; + if ((decode_delay + decode_delay_offset) > 0) + codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); + else + codegen_fpu_latency_clock(instr_cycles); + instr_cycles += COUNT(timings[opcode], op_32); + instr_cycles += exec_delay; + if ((decode_delay + decode_delay_offset) > 0) + codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; + else + codegen_block_cycles += instr_cycles; + decode_delay = (-instr_cycles) + 1; + if (deps[opcode] & FPU_POP) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c] = fpu_st_latency[c + 1]; + fpu_st_latency[7] = 0; + } + if (deps[opcode] & FPU_POP2) { + for (uint8_t c = 0; c < 6; c++) + fpu_st_latency[c] = fpu_st_latency[c + 2]; + fpu_st_latency[6] = fpu_st_latency[7] = 0; + } + if (timings[opcode] & CYCLES_FPU) { +#if 0 + if (fpu_latency) + fatal("Bad latency FPU\n");*/ +#endif + fpu_latency = FPU_F_LATENCY(timings[opcode]); + } + + if (deps[opcode] & FPU_PUSH) { + for (uint8_t c = 0; c < 7; c++) + fpu_st_latency[c + 1] = fpu_st_latency[c]; + fpu_st_latency[0] = 0; + } + if (deps[opcode] & FPU_WRITE_ST0) { +#if 0 + if (fpu_st_latency[0]) + fatal("Bad latency ST0\n");*/ +#endif + fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_ST1) { +#if 0 + if (fpu_st_latency[1]) + fatal("Bad latency ST1\n");*/ +#endif + fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); + } + if (deps[opcode] & FPU_WRITE_STREG) { + int reg = fetchdat & 7; if (deps[opcode] & FPU_POP) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c] = fpu_st_latency[c+1]; - fpu_st_latency[7] = 0; - } - if (deps[opcode] & FPU_POP2) - { - int c; - - for (c = 0; c < 6; c++) - fpu_st_latency[c] = fpu_st_latency[c+2]; - fpu_st_latency[6] = fpu_st_latency[7] = 0; - } - if (timings[opcode] & CYCLES_FPU) - { - /* if (fpu_latency) - fatal("Bad latency FPU\n");*/ - fpu_latency = FPU_F_LATENCY(timings[opcode]); - } - - if (deps[opcode] & FPU_PUSH) - { - int c; - - for (c = 0; c < 7; c++) - fpu_st_latency[c+1] = fpu_st_latency[c]; - fpu_st_latency[0] = 0; - } - if (deps[opcode] & FPU_WRITE_ST0) - { -/* if (fpu_st_latency[0]) - fatal("Bad latency ST0\n");*/ - fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_ST1) - { -/* if (fpu_st_latency[1]) - fatal("Bad latency ST1\n");*/ - fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); - } - if (deps[opcode] & FPU_WRITE_STREG) - { - int reg = fetchdat & 7; - if (deps[opcode] & FPU_POP) - reg--; - if (reg >= 0 && - !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && - !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) - { -/* if (fpu_st_latency[reg]) - fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ - fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); - } + reg--; + if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) { +#if 0 + if (fpu_st_latency[reg]) + fatal("Bad latency STREG %i %08x %i %016llx %02x\n",fpu_st_latency[reg], fetchdat, reg, timings[opcode], opcode);*/ +#endif + fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); } + } } -static void codegen_timing_winchip2_block_start(void) +static void +codegen_timing_winchip2_block_start(void) { - regmask_modified = 0; - decode_delay = decode_delay_offset = 0; - u_pipe_full = 0; + regmask_modified = 0; + decode_delay = decode_delay_offset = 0; + u_pipe_full = 0; } -static void codegen_timing_winchip2_start(void) +static void +codegen_timing_winchip2_start(void) { - timing_count = 0; - last_prefix = 0; + timing_count = 0; + last_prefix = 0; } -static void codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat) +static void +codegen_timing_winchip2_prefix(uint8_t prefix, uint32_t fetchdat) { - if (prefix == 0x0f) - { - /*0fh prefix is 'free'*/ - last_prefix = prefix; - return; - } - /*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed - by execution of previous instructions*/ - decode_delay_offset++; + if (prefix == 0x0f) { + /*0fh prefix is 'free'*/ last_prefix = prefix; + return; + } + /*On WinChip all prefixes take 1 cycle to decode. Decode may be shadowed + by execution of previous instructions*/ + decode_delay_offset++; + last_prefix = prefix; } -static void codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) +static void +codegen_timing_winchip2_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc)) { - uint32_t *timings; - uint64_t *deps; - int mod3 = ((fetchdat & 0xc0) == 0xc0); - int bit8 = !(opcode & 1); - int agi_stall = 0; + uint32_t *timings; + uint64_t *deps; + int mod3 = ((fetchdat & 0xc0) == 0xc0); + int bit8 = !(opcode & 1); + int agi_stall = 0; - switch (last_prefix) - { - case 0x0f: - timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; - deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; - break; + switch (last_prefix) { + case 0x0f: + timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; + deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; + break; - case 0xd8: - timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; - deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; - opcode = (opcode >> 3) & 7; - break; - case 0xd9: - timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; - deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xda: - timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; - deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; - opcode = (opcode >> 3) & 7; - break; - case 0xdb: - timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; - deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; - opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; - break; - case 0xdc: - timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; - deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; - opcode = (opcode >> 3) & 7; - break; - case 0xdd: - timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; - deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; - opcode = (opcode >> 3) & 7; - break; - case 0xde: - timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; - deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; - opcode = (opcode >> 3) & 7; - break; - case 0xdf: - timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; - deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; - opcode = (opcode >> 3) & 7; - break; + case 0xd8: + timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; + deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; + opcode = (opcode >> 3) & 7; + break; + case 0xd9: + timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; + deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xda: + timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; + deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; + opcode = (opcode >> 3) & 7; + break; + case 0xdb: + timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; + deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; + opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; + break; + case 0xdc: + timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; + deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; + opcode = (opcode >> 3) & 7; + break; + case 0xdd: + timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; + deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; + opcode = (opcode >> 3) & 7; + break; + case 0xde: + timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; + deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; + opcode = (opcode >> 3) & 7; + break; + case 0xdf: + timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; + deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; + opcode = (opcode >> 3) & 7; + break; + + default: + switch (opcode) { + case 0x80: + case 0x82: + case 0x83: + timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; + deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; + opcode = (fetchdat >> 3) & 7; + break; + case 0x81: + timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; + deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xc0: + case 0xc1: + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; + deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; + opcode = (fetchdat >> 3) & 7; + break; + + case 0xf6: + timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; + deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; + opcode = (fetchdat >> 3) & 7; + break; + case 0xf7: + timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; + deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; + opcode = (fetchdat >> 3) & 7; + break; + case 0xff: + timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; + deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; + opcode = (fetchdat >> 3) & 7; + break; default: - switch (opcode) - { - case 0x80: case 0x82: case 0x83: - timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; - deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; - opcode = (fetchdat >> 3) & 7; - break; - case 0x81: - timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; - deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; - opcode = (fetchdat >> 3) & 7; - break; + timings = mod3 ? opcode_timings_mod3 : opcode_timings; + deps = mod3 ? opcode_deps_mod3 : opcode_deps; + break; + } + } - case 0xc0: case 0xc1: case 0xd0: case 0xd1: case 0xd2: case 0xd3: - timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; - deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; - opcode = (fetchdat >> 3) & 7; - break; + if (u_pipe_full) { + uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - case 0xf6: - timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; - deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; - opcode = (fetchdat >> 3) & 7; - break; - case 0xf7: - timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; - deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; - opcode = (fetchdat >> 3) & 7; - break; - case 0xff: - timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; - deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; - opcode = (fetchdat >> 3) & 7; - break; + if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask)) { + int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff; + int cycles_b = timings[opcode] & 0xff; + uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode]; + uint64_t temp_deps = 0; - default: - timings = mod3 ? opcode_timings_mod3 : opcode_timings; - deps = mod3 ? opcode_deps_mod3 : opcode_deps; - break; - } - } - - if (u_pipe_full) - { - uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); - - if (can_pair(u_pipe_timings[u_pipe_opcode], timings[opcode], regmask)) - { - int cycles_a = u_pipe_timings[u_pipe_opcode] & 0xff; - int cycles_b = timings[opcode] & 0xff; - uint32_t timing = (cycles_a > cycles_b) ? u_pipe_timings[u_pipe_opcode] : timings[opcode]; - uint64_t temp_deps = 0; - - if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - - codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall); - u_pipe_full = 0; - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; - return; - } - else - { - /*No pairing, run first instruction now*/ - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; - regmask_modified = u_pipe_regmask; - } - } - if (timings[opcode] & CYCLES_IS_MMX) - { - /*Might pair with next instruction*/ - u_pipe_full = 1; - u_pipe_opcode = opcode; - u_pipe_timings = timings; - u_pipe_op_32 = op_32; - u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); - u_pipe_fetchdat = fetchdat; - u_pipe_decode_delay_offset = decode_delay_offset; - u_pipe_deps = deps; - decode_delay_offset = 0; - return; - } - - if (check_agi(deps, opcode, fetchdat, op_32)) + if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; - codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); - decode_delay_offset = 0; - regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); -} -static void codegen_timing_winchip2_block_end(void) -{ - if (u_pipe_full) - { - int agi_stall = 0; - - if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) - agi_stall = 1; - codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); - u_pipe_full = 0; + codegen_instruction(&timing, &temp_deps, 0, 0, 0, 0, agi_stall); + u_pipe_full = 0; + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; + return; + } else { + /*No pairing, run first instruction now*/ + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + regmask_modified = u_pipe_regmask; } + } + if (timings[opcode] & CYCLES_IS_MMX) { + /*Might pair with next instruction*/ + u_pipe_full = 1; + u_pipe_opcode = opcode; + u_pipe_timings = timings; + u_pipe_op_32 = op_32; + u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); + u_pipe_fetchdat = fetchdat; + u_pipe_decode_delay_offset = decode_delay_offset; + u_pipe_deps = deps; + decode_delay_offset = 0; + return; + } + + if (check_agi(deps, opcode, fetchdat, op_32)) + agi_stall = 1; + codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); + decode_delay_offset = 0; + regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); } -codegen_timing_t codegen_timing_winchip2 = +static void +codegen_timing_winchip2_block_end(void) { - codegen_timing_winchip2_start, - codegen_timing_winchip2_prefix, - codegen_timing_winchip2_opcode, - codegen_timing_winchip2_block_start, - codegen_timing_winchip2_block_end, - NULL + if (u_pipe_full) { + int agi_stall = 0; + + if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) + agi_stall = 1; + codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); + u_pipe_full = 0; + } +} + +codegen_timing_t codegen_timing_winchip2 = { + codegen_timing_winchip2_start, + codegen_timing_winchip2_prefix, + codegen_timing_winchip2_opcode, + codegen_timing_winchip2_block_start, + codegen_timing_winchip2_block_end, + NULL }; diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index d045d8aaa..c5ed8a310 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -38,6 +38,9 @@ #include <86box/pic.h> #include <86box/pci.h> #include <86box/gdbstub.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> + #ifdef USE_DYNAREC # include "codegen.h" #endif @@ -903,7 +906,7 @@ cpu_set(void) #endif x86_setopcodes_2386(ops_2386_386, ops_2386_ibm486_0f); cpu_features = CPU_FEATURE_MSR; - /* FALLTHROUGH */ + fallthrough; case CPU_386SX: case CPU_386DX: /* In case we get Deskpro 386 emulation */ @@ -1126,7 +1129,7 @@ cpu_set(void) case CPU_i486DX_SLENH: cpu_features = CPU_FEATURE_CR4 | CPU_FEATURE_VME; cpu_CR4_mask = CR4_VME | CR4_PVI | CR4_VME; - /* FALLTHROUGH */ + fallthrough; case CPU_RAPIDCAD: case CPU_i486SX: case CPU_i486DX: @@ -1439,7 +1442,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f, dynarec_ops_386, dynarec_ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f, dynarec_ops_386, dynarec_ops_c6x86mx_0f); - // x86_setopcodes(ops_386, ops_c6x86_0f, dynarec_ops_386, dynarec_ops_c6x86_0f); +#if 0 + x86_setopcodes(ops_386, ops_c6x86_0f, dynarec_ops_386, dynarec_ops_c6x86_0f); +#endif # else if (cpu_s->cpu_type == CPU_Cx6x86MX) x86_setopcodes(ops_386, ops_c6x86mx_0f); @@ -1447,7 +1452,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f); - // x86_setopcodes(ops_386, ops_c6x86_0f); +#if 0 + x86_setopcodes(ops_386, ops_c6x86_0f); +#endif # endif timing_rr = 1; /* register dest - register src */ @@ -2465,7 +2472,7 @@ cpu_ven_reset(void) case CPU_K6_3: case CPU_K6_2C: msr.amd_psor = (cpu_s->cpu_type >= CPU_K6_3) ? 0x008cULL : 0x018cULL; - /* FALLTHROUGH */ + fallthrough; case CPU_K6_2: #if defined(DEV_BRANCH) && defined(USE_AMD_K5) case CPU_K5: @@ -2479,7 +2486,6 @@ cpu_ven_reset(void) case CPU_PENTIUM2: case CPU_PENTIUM2D: msr.mtrr_cap = 0x00000508ULL; - /* FALLTHROUGH */ break; } } @@ -3252,7 +3258,9 @@ amd_k_invalid_wrmsr: break; case 0x1b: cpu_log("APIC_BASE write: %08X%08X\n", EDX, EAX); - // msr.apic_base = EAX | ((uint64_t) EDX << 32); +#if 0 + msr.apic_base = EAX | ((uint64_t) EDX << 32); +#endif break; case 0x2a: break; @@ -3421,7 +3429,7 @@ i686_invalid_wrmsr: } static void -cpu_write(uint16_t addr, uint8_t val, void *priv) +cpu_write(uint16_t addr, uint8_t val, UNUSED(void *priv)) { if (addr == 0xf0) { /* Writes to F0 clear FPU error and deassert the interrupt. */ @@ -3503,7 +3511,7 @@ cpu_write(uint16_t addr, uint8_t val, void *priv) } static uint8_t -cpu_read(uint16_t addr, void *priv) +cpu_read(uint16_t addr, UNUSED(void *priv)) { if (addr == 0xf007) return 0x7f; diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 6c01f2469..cbd1c1129 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -432,15 +432,15 @@ typedef struct { } cpu_state_t; typedef struct { - uint16_t cwd; - uint16_t swd; - uint16_t tag; - uint16_t foo; - uint32_t fip; - uint32_t fdp; - uint16_t fcs; - uint16_t fds; - floatx80 st_space[8]; + uint16_t cwd; + uint16_t swd; + uint16_t tag; + uint16_t foo; + uint32_t fip; + uint32_t fdp; + uint16_t fcs; + uint16_t fds; + floatx80 st_space[8]; unsigned char tos; unsigned char align1; unsigned char align2; diff --git a/src/cpu/x86.c b/src/cpu/x86.c index 76101c344..182431773 100644 --- a/src/cpu/x86.c +++ b/src/cpu/x86.c @@ -59,10 +59,12 @@ uint32_t rmdat; uint64_t xt_cpu_multi; /* Variables for handling the non-maskable interrupts. */ -int nmi = 0, nmi_auto_clear = 0; +int nmi = 0; +int nmi_auto_clear = 0; /* Was the CPU ever reset? */ -int x86_was_reset = 0, soft_reset_pci = 0; +int x86_was_reset = 0; +int soft_reset_pci = 0; /* Is the TRAP flag on? */ int trap = 0; @@ -71,7 +73,8 @@ int trap = 0; uint32_t easeg; /* This is for the OPTI 283 special reset handling mode. */ -int reset_on_hlt, hlt_reset_pending; +int reset_on_hlt; +int hlt_reset_pending; #ifdef ENABLE_X86_LOG void dumpregs(int); @@ -171,7 +174,10 @@ makemod1table(void) static void makeznptable(void) { - int c, d, e; + int c; + int d; + int e; + for (c = 0; c < 256; c++) { d = 0; for (e = 0; e < 8; e++) { diff --git a/src/cpu/x86.h b/src/cpu/x86.h index 337619fa4..77d9329fe 100644 --- a/src/cpu/x86.h +++ b/src/cpu/x86.h @@ -9,30 +9,44 @@ that we don't end up with an unnecessarily short block*/ #define ABRT_EXPECTED 0x80 -extern uint8_t opcode, opcode2; +extern uint8_t opcode; +extern uint8_t opcode2; extern uint8_t flags_p; extern uint8_t znptable8[256]; -extern uint16_t zero, oldcs; -extern uint16_t lastcs, lastpc; +extern uint16_t zero; +extern uint16_t oldcs; +extern uint16_t lastcs; +extern uint16_t lastpc; extern uint16_t *mod1add[2][8]; extern uint16_t znptable16[65536]; -extern int x86_was_reset, trap; -extern int codegen_flat_ss, codegen_flat_ds; -extern int timetolive, keyboardtimer, trap; -extern int optype, stack32; -extern int oldcpl, cgate32, cpl_override; +extern int x86_was_reset; +extern int trap; +extern int codegen_flat_ss; +extern int codegen_flat_ds; +extern int timetolive; +extern int keyboardtimer; +extern int trap; +extern int optype; +extern int stack32; +extern int oldcpl; +extern int cgate32; +extern int cpl_override; extern int nmi_enable; -extern int oddeven, inttype; +extern int oddeven; +extern int inttype; extern uint32_t use32; -extern uint32_t rmdat, easeg; -extern uint32_t oxpc, flags_zn; +extern uint32_t rmdat; +extern uint32_t easeg; +extern uint32_t oxpc; +extern uint32_t flags_zn; extern uint32_t abrt_error; extern uint32_t backupregs[16]; extern uint32_t *mod1seg[8]; -extern uint32_t *eal_r, *eal_w; +extern uint32_t *eal_r; +extern uint32_t *eal_w; #define fetchdat rmdat @@ -68,13 +82,13 @@ extern uint32_t *eal_r, *eal_w; enum { ABRT_NONE = 0, - ABRT_GEN, - ABRT_TS = 0xA, - ABRT_NP = 0xB, - ABRT_SS = 0xC, - ABRT_GPF = 0xD, - ABRT_PF = 0xE, - ABRT_DE = 0x40 /* INT 0, but we have to distinguish it from ABRT_NONE. */ + ABRT_GEN = 1, + ABRT_TS = 0xA, + ABRT_NP = 0xB, + ABRT_SS = 0xC, + ABRT_GPF = 0xD, + ABRT_PF = 0xE, + ABRT_DE = 0x40 /* INT 0, but we have to distinguish it from ABRT_NONE. */ }; extern void x86_doabrt(int x86_abrt); diff --git a/src/cpu/x86_ops_3dnow.h b/src/cpu/x86_ops_3dnow.h index ff657d708..e9826a7e3 100644 --- a/src/cpu/x86_ops_3dnow.h +++ b/src/cpu/x86_ops_3dnow.h @@ -139,7 +139,8 @@ opPSWAPD(uint32_t fetchdat) { MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf, tempf2; + float tempf; + float tempf2; MMX_GETSRC(); diff --git a/src/cpu/x86_ops_arith.h b/src/cpu/x86_ops_arith.h index 41c655d09..09d64bb87 100644 --- a/src/cpu/x86_ops_arith.h +++ b/src/cpu/x86_ops_arith.h @@ -340,6 +340,7 @@ static int opCMP_b_rmw_a16(uint32_t fetchdat) { uint8_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -359,6 +360,7 @@ static int opCMP_b_rmw_a32(uint32_t fetchdat) { uint8_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -379,6 +381,7 @@ static int opCMP_w_rmw_a16(uint32_t fetchdat) { uint16_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -398,6 +401,7 @@ static int opCMP_w_rmw_a32(uint32_t fetchdat) { uint16_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -418,6 +422,7 @@ static int opCMP_l_rmw_a16(uint32_t fetchdat) { uint32_t dst; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -437,6 +442,7 @@ static int opCMP_l_rmw_a32(uint32_t fetchdat) { uint32_t dst; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -457,6 +463,7 @@ static int opCMP_b_rm_a16(uint32_t fetchdat) { uint8_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -472,6 +479,7 @@ static int opCMP_b_rm_a32(uint32_t fetchdat) { uint8_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -488,6 +496,7 @@ static int opCMP_w_rm_a16(uint32_t fetchdat) { uint16_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -503,6 +512,7 @@ static int opCMP_w_rm_a32(uint32_t fetchdat) { uint16_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -519,6 +529,7 @@ static int opCMP_l_rm_a16(uint32_t fetchdat) { uint32_t src; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -534,6 +545,7 @@ static int opCMP_l_rm_a32(uint32_t fetchdat) { uint32_t src; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -550,6 +562,7 @@ static int opCMP_AL_imm(uint32_t fetchdat) { uint8_t src = getbytef(); + setsub8(AL, src); CLOCK_CYCLES(timing_rr); PREFETCH_RUN(timing_rr, 2, -1, 0, 0, 0, 0, 0); @@ -560,6 +573,7 @@ static int opCMP_AX_imm(uint32_t fetchdat) { uint16_t src = getwordf(); + setsub16(AX, src); CLOCK_CYCLES(timing_rr); PREFETCH_RUN(timing_rr, 3, -1, 0, 0, 0, 0, 0); @@ -570,6 +584,7 @@ static int opCMP_EAX_imm(uint32_t fetchdat) { uint32_t src = getlong(); + if (cpu_state.abrt) return 1; setsub32(EAX, src); @@ -581,7 +596,9 @@ opCMP_EAX_imm(uint32_t fetchdat) static int opTEST_b_a16(uint32_t fetchdat) { - uint8_t temp, temp2; + uint8_t temp; + uint8_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -601,7 +618,9 @@ opTEST_b_a16(uint32_t fetchdat) static int opTEST_b_a32(uint32_t fetchdat) { - uint8_t temp, temp2; + uint8_t temp; + uint8_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -622,7 +641,9 @@ opTEST_b_a32(uint32_t fetchdat) static int opTEST_w_a16(uint32_t fetchdat) { - uint16_t temp, temp2; + uint16_t temp; + uint16_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -642,7 +663,9 @@ opTEST_w_a16(uint32_t fetchdat) static int opTEST_w_a32(uint32_t fetchdat) { - uint16_t temp, temp2; + uint16_t temp; + uint16_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -663,7 +686,9 @@ opTEST_w_a32(uint32_t fetchdat) static int opTEST_l_a16(uint32_t fetchdat) { - uint32_t temp, temp2; + uint32_t temp; + uint16_t temp2; + fetch_ea_16(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -683,7 +708,9 @@ opTEST_l_a16(uint32_t fetchdat) static int opTEST_l_a32(uint32_t fetchdat) { - uint32_t temp, temp2; + uint32_t temp; + uint16_t temp2; + fetch_ea_32(fetchdat); if (cpu_mod != 3) SEG_CHECK_READ(cpu_state.ea_seg); @@ -803,7 +830,8 @@ opTEST_EAX(uint32_t fetchdat) static int op80_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -823,7 +851,8 @@ op80_a16(uint32_t fetchdat) static int op80_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -843,7 +872,8 @@ op80_a32(uint32_t fetchdat) static int op81_w_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -863,7 +893,8 @@ op81_w_a16(uint32_t fetchdat) static int op81_w_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -883,7 +914,8 @@ op81_w_a32(uint32_t fetchdat) static int op81_l_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -903,7 +935,8 @@ op81_l_a16(uint32_t fetchdat) static int op81_l_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -924,7 +957,8 @@ op81_l_a32(uint32_t fetchdat) static int op83_w_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -946,7 +980,8 @@ op83_w_a16(uint32_t fetchdat) static int op83_w_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -969,7 +1004,8 @@ op83_w_a32(uint32_t fetchdat) static int op83_l_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint16_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -991,7 +1027,8 @@ op83_l_a16(uint32_t fetchdat) static int op83_l_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_atomic.h b/src/cpu/x86_ops_atomic.h index 4f3439973..f0cab34e1 100644 --- a/src/cpu/x86_ops_atomic.h +++ b/src/cpu/x86_ops_atomic.h @@ -1,7 +1,9 @@ static int opCMPXCHG_b_a16(uint32_t fetchdat) { - uint8_t temp, temp2 = AL; + uint8_t temp; + uint8_t temp2 = AL; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteab(); @@ -20,7 +22,9 @@ opCMPXCHG_b_a16(uint32_t fetchdat) static int opCMPXCHG_b_a32(uint32_t fetchdat) { - uint8_t temp, temp2 = AL; + uint8_t temp; + uint8_t temp2 = AL; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteab(); @@ -40,7 +44,9 @@ opCMPXCHG_b_a32(uint32_t fetchdat) static int opCMPXCHG_w_a16(uint32_t fetchdat) { - uint16_t temp, temp2 = AX; + uint16_t temp; + uint16_t temp2 = AX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteaw(); @@ -59,7 +65,9 @@ opCMPXCHG_w_a16(uint32_t fetchdat) static int opCMPXCHG_w_a32(uint32_t fetchdat) { - uint16_t temp, temp2 = AX; + uint16_t temp; + uint16_t temp2 = AX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteaw(); @@ -79,7 +87,9 @@ opCMPXCHG_w_a32(uint32_t fetchdat) static int opCMPXCHG_l_a16(uint32_t fetchdat) { - uint32_t temp, temp2 = EAX; + uint32_t temp; + uint32_t temp2 = EAX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -98,7 +108,9 @@ opCMPXCHG_l_a16(uint32_t fetchdat) static int opCMPXCHG_l_a32(uint32_t fetchdat) { - uint32_t temp, temp2 = EAX; + uint32_t temp; + uint32_t temp2 = EAX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -119,7 +131,11 @@ opCMPXCHG_l_a32(uint32_t fetchdat) static int opCMPXCHG8B_a16(uint32_t fetchdat) { - uint32_t temp, temp_hi, temp2 = EAX, temp2_hi = EDX; + uint32_t temp; + uint32_t temp_hi; + uint32_t temp2 = EAX; + uint32_t temp2_hi = EDX; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -146,7 +162,11 @@ opCMPXCHG8B_a16(uint32_t fetchdat) static int opCMPXCHG8B_a32(uint32_t fetchdat) { - uint32_t temp, temp_hi, temp2 = EAX, temp2_hi = EDX; + uint32_t temp; + uint32_t temp_hi; + uint32_t temp2 = EAX; + uint32_t temp2_hi = EDX; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); temp = geteal(); @@ -177,7 +197,9 @@ static int opXADD_b_a16(uint32_t fetchdat) { uint8_t temp; - uint8_t src, dest; + uint8_t src; + uint8_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = getr8(cpu_reg); @@ -197,7 +219,9 @@ static int opXADD_b_a32(uint32_t fetchdat) { uint8_t temp; - uint8_t src, dest; + uint8_t src; + uint8_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = getr8(cpu_reg); @@ -218,7 +242,9 @@ static int opXADD_w_a16(uint32_t fetchdat) { uint16_t temp; - uint16_t src, dest; + uint16_t src; + uint16_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].w; @@ -238,7 +264,9 @@ static int opXADD_w_a32(uint32_t fetchdat) { uint16_t temp; - uint16_t src, dest; + uint16_t src; + uint16_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].w; @@ -259,7 +287,9 @@ static int opXADD_l_a16(uint32_t fetchdat) { uint32_t temp; - uint32_t src, dest; + uint32_t src; + uint32_t dest; + fetch_ea_16(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].l; @@ -279,7 +309,9 @@ static int opXADD_l_a32(uint32_t fetchdat) { uint32_t temp; - uint32_t src, dest; + uint32_t src; + uint32_t dest; + fetch_ea_32(fetchdat); SEG_CHECK_WRITE(cpu_state.ea_seg); src = cpu_state.regs[cpu_reg].l; diff --git a/src/cpu/x86_ops_bcd.h b/src/cpu/x86_ops_bcd.h index d3ff97ead..b4779ab3e 100644 --- a/src/cpu/x86_ops_bcd.h +++ b/src/cpu/x86_ops_bcd.h @@ -19,6 +19,7 @@ static int opAAD(uint32_t fetchdat) { int base = getbytef(); + if (!cpu_isintel) base = 10; AL = (AH * base) + AL; @@ -33,6 +34,7 @@ static int opAAM(uint32_t fetchdat) { int base = getbytef(); + if (!base || !cpu_isintel) base = 10; AH = AL / base; @@ -63,7 +65,9 @@ opAAS(uint32_t fetchdat) static int opDAA(uint32_t fetchdat) { - uint16_t tempw, old_AL, old_CF; + uint16_t tempw; + uint16_t old_AL; + uint16_t old_CF; flags_rebuild(); old_AL = AL; @@ -98,7 +102,9 @@ opDAA(uint32_t fetchdat) static int opDAS(uint32_t fetchdat) { - uint16_t tempw, old_AL, old_CF; + uint16_t tempw; + uint16_t old_AL; + uint16_t old_CF; flags_rebuild(); old_AL = AL; diff --git a/src/cpu/x86_ops_bit.h b/src/cpu/x86_ops_bit.h index 8514e8e1c..b72142b5a 100644 --- a/src/cpu/x86_ops_bit.h +++ b/src/cpu/x86_ops_bit.h @@ -201,13 +201,17 @@ opBT_l_r_a32(uint32_t fetchdat) return 0; \ } +// clang-format off opBT(C, ^=) - opBT(R, &= ~) - opBT(S, |=) +opBT(R, &= ~) +opBT(S, |=) + // clang-format on - static int opBA_w_a16(uint32_t fetchdat) +static int +opBA_w_a16(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint16_t temp; fetch_ea_16(fetchdat); @@ -258,7 +262,8 @@ opBT(C, ^=) static int opBA_w_a32(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint16_t temp; fetch_ea_32(fetchdat); @@ -310,7 +315,8 @@ opBA_w_a32(uint32_t fetchdat) static int opBA_l_a16(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint32_t temp; fetch_ea_16(fetchdat); @@ -361,7 +367,8 @@ opBA_l_a16(uint32_t fetchdat) static int opBA_l_a32(uint32_t fetchdat) { - int tempc, count; + int tempc; + int count; uint32_t temp; fetch_ea_32(fetchdat); diff --git a/src/cpu/x86_ops_call.h b/src/cpu/x86_ops_call.h index 88899cef8..731f58ec8 100644 --- a/src/cpu/x86_ops_call.h +++ b/src/cpu/x86_ops_call.h @@ -189,8 +189,10 @@ static int opCALL_far_w(uint32_t fetchdat) { - uint32_t old_cs, old_pc; - uint16_t new_cs, new_pc; + uint32_t old_cs; + uint32_t old_pc; + uint16_t new_cs; + uint16_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -209,8 +211,10 @@ opCALL_far_w(uint32_t fetchdat) static int opCALL_far_l(uint32_t fetchdat) { - uint32_t old_cs, old_pc; - uint32_t new_cs, new_pc; + uint32_t old_cs; + uint32_t old_pc; + uint32_t new_cs; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -230,8 +234,10 @@ opCALL_far_l(uint32_t fetchdat) static int opFF_w_a16(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -392,8 +398,10 @@ opFF_w_a16(uint32_t fetchdat) static int opFF_w_a32(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -555,8 +563,10 @@ opFF_w_a32(uint32_t fetchdat) static int opFF_l_a16(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); @@ -717,8 +727,10 @@ opFF_l_a16(uint32_t fetchdat) static int opFF_l_a32(uint32_t fetchdat) { - uint16_t old_cs, new_cs; - uint32_t old_pc, new_pc; + uint16_t old_cs; + uint16_t new_cs; + uint32_t old_pc; + uint32_t new_pc; int cycles_old = cycles; UN_USED(cycles_old); diff --git a/src/cpu/x86_ops_fpu.h b/src/cpu/x86_ops_fpu.h index 29e999941..849e24e3d 100644 --- a/src/cpu/x86_ops_fpu.h +++ b/src/cpu/x86_ops_fpu.h @@ -97,7 +97,9 @@ opWAIT(uint32_t fetchdat) return 1; } - // if (!cpu_use_dynarec && fpu_softfloat) { +#if 0 + if (!cpu_use_dynarec && fpu_softfloat) { +#endif if (fpu_softfloat) { if (fpu_state.swd & FPU_SW_Summary) { if (cr0 & 0x20) { diff --git a/src/cpu/x86_ops_i686.h b/src/cpu/x86_ops_i686.h index f2b07a1c4..f11bca945 100644 --- a/src/cpu/x86_ops_i686.h +++ b/src/cpu/x86_ops_i686.h @@ -178,11 +178,13 @@ fx_save_stor_common(uint32_t fetchdat, int bits) uint8_t ftwb = 0; uint16_t rec_ftw = 0; uint16_t fpus = 0; - int i, mmx_tags = 0; + int i; + int mmx_tags = 0; uint16_t exp = 0x0000; uint64_t mant = 0x0000000000000000ULL; uint64_t fraction; - uint8_t jm, valid; + uint8_t jm; + uint8_t valid; /* Exp_all_1 Exp_all_0 Frac_all_0 J M FTW_Valid | Ent ----------------------------------------------+------ */ uint8_t ftw_table_idx; diff --git a/src/cpu/x86_ops_jump.h b/src/cpu/x86_ops_jump.h index a1503a75e..33e1ed4f0 100644 --- a/src/cpu/x86_ops_jump.h +++ b/src/cpu/x86_ops_jump.h @@ -88,7 +88,8 @@ opJ(LE) opJ(NLE) // clang-format on - static int opLOOPNE_w(uint32_t fetchdat) +static int +opLOOPNE_w(uint32_t fetchdat) { int8_t offset = (int8_t) getbytef(); CX--; @@ -271,8 +272,10 @@ opJMP_r32(uint32_t fetchdat) static int opJMP_far_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; uint32_t old_pc; + addr = getwordf(); seg = getword(); if (cpu_state.abrt) @@ -289,7 +292,9 @@ static int opJMP_far_a32(uint32_t fetchdat) { uint16_t seg; - uint32_t addr, old_pc; + uint32_t addr; + uint32_t old_pc; + addr = getlong(); seg = getword(); if (cpu_state.abrt) @@ -307,6 +312,7 @@ static int opCALL_r16(uint32_t fetchdat) { int16_t addr = (int16_t) getwordf(); + PUSH_W(cpu_state.pc); cpu_state.pc += addr; cpu_state.pc &= 0xffff; @@ -320,6 +326,7 @@ static int opCALL_r32(uint32_t fetchdat) { int32_t addr = getlong(); + if (cpu_state.abrt) return 1; PUSH_L(cpu_state.pc); diff --git a/src/cpu/x86_ops_misc.h b/src/cpu/x86_ops_misc.h index 60ed873e4..170457caa 100644 --- a/src/cpu/x86_ops_misc.h +++ b/src/cpu/x86_ops_misc.h @@ -55,7 +55,8 @@ opF6_a16(uint32_t fetchdat) int tempws2 = 0; uint16_t tempw = 0; uint16_t src16; - uint8_t src, dst; + uint8_t src; + uint8_t dst; int8_t temps; fetch_ea_16(fetchdat); @@ -173,7 +174,8 @@ opF6_a32(uint32_t fetchdat) int tempws2 = 0; uint16_t tempw = 0; uint16_t src16; - uint8_t src, dst; + uint8_t src; + uint8_t dst; int8_t temps; fetch_ea_32(fetchdat); @@ -404,7 +406,8 @@ opF7_w_a32(uint32_t fetchdat) int tempws; int tempws2 = 1; int16_t temps16; - uint16_t src, dst; + uint16_t src; + uint16_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -514,7 +517,8 @@ static int opF7_l_a16(uint32_t fetchdat) { uint64_t temp64; - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -606,7 +610,8 @@ static int opF7_l_a32(uint32_t fetchdat) { uint64_t temp64; - uint32_t src, dst; + uint32_t src; + uint32_t dst; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -739,7 +744,8 @@ opLOCK(uint32_t fetchdat) static int opBOUND_w_a16(uint32_t fetchdat) { - int16_t low, high; + int16_t low; + int16_t high; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -761,7 +767,8 @@ opBOUND_w_a16(uint32_t fetchdat) static int opBOUND_w_a32(uint32_t fetchdat) { - int16_t low, high; + int16_t low; + int16_t high; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -784,7 +791,8 @@ opBOUND_w_a32(uint32_t fetchdat) static int opBOUND_l_a16(uint32_t fetchdat) { - int32_t low, high; + int32_t low; + int32_t high; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -806,7 +814,8 @@ opBOUND_l_a16(uint32_t fetchdat) static int opBOUND_l_a32(uint32_t fetchdat) { - int32_t low, high; + int32_t low; + int32_t high; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); diff --git a/src/cpu/x86_ops_mmx_mov.h b/src/cpu/x86_ops_mmx_mov.h index 65bbb0c01..f04e271ef 100644 --- a/src/cpu/x86_ops_mmx_mov.h +++ b/src/cpu/x86_ops_mmx_mov.h @@ -115,7 +115,7 @@ opMOVD_mm_l_a32(uint32_t fetchdat) static int opMOVD_mm_l_a16_cx(uint32_t fetchdat) { - MMX_REG *op; + const MMX_REG *op; if (in_smm) return opSMINT(fetchdat); @@ -144,7 +144,7 @@ opMOVD_mm_l_a16_cx(uint32_t fetchdat) static int opMOVD_mm_l_a32_cx(uint32_t fetchdat) { - MMX_REG *op; + const MMX_REG *op; if (in_smm) return opSMINT(fetchdat); diff --git a/src/cpu/x86_ops_mov_ctrl.h b/src/cpu/x86_ops_mov_ctrl.h index d95116c93..b0c841f83 100644 --- a/src/cpu/x86_ops_mov_ctrl.h +++ b/src/cpu/x86_ops_mov_ctrl.h @@ -251,12 +251,17 @@ opMOV_DRx_r_a32(uint32_t fetchdat) static void opMOV_r_TRx(void) { - // uint32_t base; +#if 0 + uint32_t base; + + base = _tr[4] & 0xfffff800; +#endif - // base = _tr[4] & 0xfffff800; switch (cpu_reg) { case 3: - // pclog("[R] %08X cache = %08X\n", base + cache_index, _tr[3]); +#if 0 + pclog("[R] %08X cache = %08X\n", base + cache_index, _tr[3]); +#endif _tr[3] = *(uint32_t *) &(_cache[cache_index]); cache_index = (cache_index + 4) & 0xf; break; @@ -293,42 +298,57 @@ static void opMOV_TRx_r(void) { uint32_t base; - int i, ctl; + int i; + int ctl; _tr[cpu_reg] = cpu_state.regs[cpu_rm].l; base = _tr[4] & 0xfffff800; ctl = _tr[5] & 3; switch (cpu_reg) { case 3: - // pclog("[W] %08X cache = %08X\n", base + cache_index, _tr[3]); +#if 0 + pclog("[W] %08X cache = %08X\n", base + cache_index, _tr[3]); +#endif *(uint32_t *) &(_cache[cache_index]) = _tr[3]; cache_index = (cache_index + 4) & 0xf; break; case 4: - // if (!(cr0 & 1) && !(_tr[5] & (1 << 19))) - // pclog("TAG = %08X, DEST = %08X\n", base, base + cache_index - 16); +#if 0 + if (!(cr0 & 1) && !(_tr[5] & (1 << 19))) + pclog("TAG = %08X, DEST = %08X\n", base, base + cache_index - 16); +#endif break; case 5: - // pclog("[16] EXT = %i (%i), SET = %04X\n", !!(_tr[5] & (1 << 19)), _tr[5] & 0x03, _tr[5] & 0x7f0); +#if 0 + pclog("[16] EXT = %i (%i), SET = %04X\n", !!(_tr[5] & (1 << 19)), _tr[5] & 0x03, _tr[5] & 0x7f0); +#endif if (!(_tr[5] & (1 << 19))) { switch (ctl) { case 0: - // pclog(" Cache fill or read...\n", base); +#if 0 + pclog(" Cache fill or read...\n", base); +#endif break; case 1: base += (_tr[5] & 0x7f0); - // pclog(" Writing 16 bytes to %08X...\n", base); +#if 0 + pclog(" Writing 16 bytes to %08X...\n", base); +#endif for (i = 0; i < 16; i += 4) mem_writel_phys(base + i, *(uint32_t *) &(_cache[i])); break; case 2: base += (_tr[5] & 0x7f0); - // pclog(" Reading 16 bytes from %08X...\n", base); +#if 0 + pclog(" Reading 16 bytes from %08X...\n", base); +#endif for (i = 0; i < 16; i += 4) *(uint32_t *) &(_cache[i]) = mem_readl_phys(base + i); break; case 3: - // pclog(" Cache invalidate/flush...\n", base); +#if 0 + pclog(" Cache invalidate/flush...\n", base); +#endif break; } } diff --git a/src/cpu/x86_ops_mov_seg.h b/src/cpu/x86_ops_mov_seg.h index c6bfd9933..66e77d585 100644 --- a/src/cpu/x86_ops_mov_seg.h +++ b/src/cpu/x86_ops_mov_seg.h @@ -258,7 +258,8 @@ opMOV_seg_w_a32(uint32_t fetchdat) static int opLDS_w_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -280,7 +281,8 @@ opLDS_w_a16(uint32_t fetchdat) static int opLDS_w_a32(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -349,7 +351,8 @@ opLDS_l_a32(uint32_t fetchdat) static int opLSS_w_a16(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_16(fetchdat); ILLEGAL_ON(cpu_mod == 3); @@ -371,7 +374,8 @@ opLSS_w_a16(uint32_t fetchdat) static int opLSS_w_a32(uint32_t fetchdat) { - uint16_t addr, seg; + uint16_t addr; + uint16_t seg; fetch_ea_32(fetchdat); ILLEGAL_ON(cpu_mod == 3); diff --git a/src/cpu/x86_ops_mul.h b/src/cpu/x86_ops_mul.h index 552a9973a..aa7526e75 100644 --- a/src/cpu/x86_ops_mul.h +++ b/src/cpu/x86_ops_mul.h @@ -2,7 +2,8 @@ static int opIMUL_w_iw_a16(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -31,7 +32,8 @@ static int opIMUL_w_iw_a32(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -61,7 +63,8 @@ static int opIMUL_l_il_a16(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -90,7 +93,8 @@ static int opIMUL_l_il_a32(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -120,7 +124,8 @@ static int opIMUL_w_ib_a16(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -151,7 +156,8 @@ static int opIMUL_w_ib_a32(uint32_t fetchdat) { int32_t templ; - int16_t tempw, tempw2; + int16_t tempw; + int16_t tempw2; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -183,7 +189,8 @@ static int opIMUL_l_ib_a16(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -214,7 +221,8 @@ static int opIMUL_l_ib_a32(uint32_t fetchdat) { int64_t temp64; - int32_t templ, templ2; + int32_t templ; + int32_t templ2; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_pmode.h b/src/cpu/x86_ops_pmode.h index c8e146450..1254d7289 100644 --- a/src/cpu/x86_ops_pmode.h +++ b/src/cpu/x86_ops_pmode.h @@ -179,10 +179,16 @@ opLAR(w_a16, fetch_ea_16, 0, 0) static int op0F00_common(uint32_t fetchdat, int ea32) { - int dpl, valid, granularity; - uint32_t addr, base, limit; - uint16_t desc, sel; - uint8_t access, ar_high; + int dpl; + int valid; + int granularity; + uint32_t addr; + uint32_t base; + uint32_t limit; + uint16_t desc; + uint16_t sel; + uint8_t access; + uint8_t ar_high; switch (rmdat & 0x38) { case 0x00: /*SLDT*/ @@ -356,7 +362,9 @@ static int op0F01_common(uint32_t fetchdat, int is32, int is286, int ea32) { uint32_t base; - uint16_t limit, tempw; + uint16_t limit; + uint16_t tempw; + switch (rmdat & 0x38) { case 0x00: /*SGDT*/ if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_prefix.h b/src/cpu/x86_ops_prefix.h index eba59c17a..1918e8acd 100644 --- a/src/cpu/x86_ops_prefix.h +++ b/src/cpu/x86_ops_prefix.h @@ -90,7 +90,8 @@ op_seg(GS_REPNE, cpu_state.seg_gs, x86_opcodes_REPNE, x86_opcodes) op_seg(SS_REPNE, cpu_state.seg_ss, x86_opcodes_REPNE, x86_opcodes) // clang-format on - static int op_66(uint32_t fetchdat) /*Data size select*/ +static int +op_66(uint32_t fetchdat) /*Data size select*/ { fetchdat = fastreadl(cs + cpu_state.pc); if (cpu_state.abrt) diff --git a/src/cpu/x86_ops_ret.h b/src/cpu/x86_ops_ret.h index ec200f2d7..64da566d3 100644 --- a/src/cpu/x86_ops_ret.h +++ b/src/cpu/x86_ops_ret.h @@ -190,7 +190,9 @@ opIRET(uint32_t fetchdat) if ((cr0 & 1) && (cpu_state.eflags & VM_FLAG) && (IOPL != 3)) { if (cr4 & CR4_VME) { - uint16_t new_pc, new_cs, new_flags; + uint16_t new_pc; + uint16_t new_cs; + uint16_t new_flags; new_pc = readmemw(ss, SP); new_cs = readmemw(ss, ((SP + 2) & 0xffff)); diff --git a/src/cpu/x86_ops_shift.h b/src/cpu/x86_ops_shift.h index 9c11a32f0..22c9aa8b6 100644 --- a/src/cpu/x86_ops_shift.h +++ b/src/cpu/x86_ops_shift.h @@ -605,7 +605,8 @@ opC0_a16(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -624,7 +625,8 @@ opC0_a32(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -643,7 +645,8 @@ opC1_w_a16(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -662,7 +665,8 @@ opC1_w_a32(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -681,7 +685,8 @@ opC1_l_a16(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -700,7 +705,8 @@ opC1_l_a32(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -720,7 +726,8 @@ opD0_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -736,7 +743,8 @@ opD0_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -752,7 +760,8 @@ opD1_w_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -768,7 +777,8 @@ opD1_w_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -784,7 +794,8 @@ opD1_l_a16(uint32_t fetchdat) { int c = 1; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -800,7 +811,8 @@ opD1_l_a32(uint32_t fetchdat) { int c = 1; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -817,7 +829,8 @@ opD2_a16(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -834,7 +847,8 @@ opD2_a32(uint32_t fetchdat) { int c; int tempc; - uint8_t temp, temp2 = 0; + uint8_t temp; + uint8_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -851,7 +865,8 @@ opD3_w_a16(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -868,7 +883,8 @@ opD3_w_a32(uint32_t fetchdat) { int c; int tempc; - uint16_t temp, temp2 = 0; + uint16_t temp; + uint16_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) @@ -885,7 +901,8 @@ opD3_l_a16(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_16(fetchdat); if (cpu_mod != 3) @@ -902,7 +919,8 @@ opD3_l_a32(uint32_t fetchdat) { int c; int tempc; - uint32_t temp, temp2 = 0; + uint32_t temp; + uint32_t temp2 = 0; fetch_ea_32(fetchdat); if (cpu_mod != 3) diff --git a/src/cpu/x86_ops_stack.h b/src/cpu/x86_ops_stack.h index 8217a9e5a..aa6d4f31f 100644 --- a/src/cpu/x86_ops_stack.h +++ b/src/cpu/x86_ops_stack.h @@ -379,9 +379,13 @@ opENTER_w(uint32_t fetchdat) { uint16_t offset; int count; - uint32_t tempEBP, tempESP, frame_ptr; + uint32_t tempEBP; + uint32_t tempESP; + uint32_t frame_ptr; #ifndef IS_DYNAREC - int reads = 0, writes = 1, instr_cycles = 0; + int reads = 0; + int writes = 1; + int instr_cycles = 0; #endif uint16_t tempw; @@ -448,9 +452,13 @@ opENTER_l(uint32_t fetchdat) { uint16_t offset; int count; - uint32_t tempEBP, tempESP, frame_ptr; + uint32_t tempEBP; + uint32_t tempESP; + uint32_t frame_ptr; #ifndef IS_DYNAREC - int reads = 0, writes = 1, instr_cycles = 0; + int reads = 0; + int writes = 1; + int instr_cycles = 0; #endif uint32_t templ; diff --git a/src/cpu/x86_ops_string.h b/src/cpu/x86_ops_string.h index c9ba94760..619386fcb 100644 --- a/src/cpu/x86_ops_string.h +++ b/src/cpu/x86_ops_string.h @@ -219,7 +219,8 @@ opMOVSL_a32(uint32_t fetchdat) static int opCMPSB_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -259,7 +260,8 @@ opCMPSB_a16(uint32_t fetchdat) static int opCMPSB_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -300,7 +302,8 @@ opCMPSB_a32(uint32_t fetchdat) static int opCMPSW_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -341,7 +344,8 @@ opCMPSW_a16(uint32_t fetchdat) static int opCMPSW_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -383,7 +387,8 @@ opCMPSW_a32(uint32_t fetchdat) static int opCMPSL_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; @@ -424,7 +429,8 @@ opCMPSL_a16(uint32_t fetchdat) static int opCMPSL_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; diff --git a/src/cpu/x86_ops_string_2386.h b/src/cpu/x86_ops_string_2386.h index 29ca0d9a4..98875e54f 100644 --- a/src/cpu/x86_ops_string_2386.h +++ b/src/cpu/x86_ops_string_2386.h @@ -219,7 +219,8 @@ opMOVSL_a32(uint32_t fetchdat) static int opCMPSB_a16(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -255,7 +256,8 @@ opCMPSB_a16(uint32_t fetchdat) static int opCMPSB_a32(uint32_t fetchdat) { - uint8_t src, dst; + uint8_t src; + uint8_t dst; addr64 = addr64_2 = 0x00000000; @@ -292,7 +294,8 @@ opCMPSB_a32(uint32_t fetchdat) static int opCMPSW_a16(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -329,7 +332,8 @@ opCMPSW_a16(uint32_t fetchdat) static int opCMPSW_a32(uint32_t fetchdat) { - uint16_t src, dst; + uint16_t src; + uint16_t dst; addr64a[0] = addr64a[1] = 0x00000000; addr64a_2[0] = addr64a_2[1] = 0x00000000; @@ -367,7 +371,8 @@ opCMPSW_a32(uint32_t fetchdat) static int opCMPSL_a16(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; @@ -404,7 +409,8 @@ opCMPSL_a16(uint32_t fetchdat) static int opCMPSL_a32(uint32_t fetchdat) { - uint32_t src, dst; + uint32_t src; + uint32_t dst; addr64a[0] = addr64a[1] = addr64a[2] = addr64a[3] = 0x00000000; addr64a_2[0] = addr64a_2[1] = addr64a_2[2] = addr64a_2[3] = 0x00000000; diff --git a/src/cpu/x86seg.c b/src/cpu/x86seg.c index 7f630275c..3c4847a36 100644 --- a/src/cpu/x86seg.c +++ b/src/cpu/x86seg.c @@ -31,13 +31,17 @@ #include <86box/machine.h> #include <86box/mem.h> #include <86box/nvr.h> +#include <86box/plat_fallthrough.h> +#include <86box/plat_unused.h> + #include "x86.h" #include "x86_flags.h" #include "386_common.h" uint8_t opcode2; -int cgate16, cgate32; +int cgate16; +int cgate32; int intgatesize; void taskswitch286(uint16_t seg, uint16_t *segdat, int is32); @@ -157,7 +161,7 @@ x86_doabrt(int x86_abrt) } void -x86de(char *s, uint16_t error) +x86de(UNUSED(char *s), UNUSED(uint16_t error)) { #ifdef BAD_CODE cpu_state.abrt = ABRT_DE; @@ -168,35 +172,35 @@ x86de(char *s, uint16_t error) } void -x86gpf(char *s, uint16_t error) +x86gpf(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_GPF; abrt_error = error; } void -x86gpf_expected(char *s, uint16_t error) +x86gpf_expected(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_GPF | ABRT_EXPECTED; abrt_error = error; } void -x86ss(char *s, uint16_t error) +x86ss(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_SS; abrt_error = error; } void -x86ts(char *s, uint16_t error) +x86ts(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_TS; abrt_error = error; } void -x86np(char *s, uint16_t error) +x86np(UNUSED(char *s), uint16_t error) { cpu_state.abrt = ABRT_NP; abrt_error = error; @@ -272,9 +276,9 @@ do_seg_v86_init(x86seg *s) static void check_seg_valid(x86seg *s) { - int dpl = (s->access >> 5) & 3; - int valid = 1; - x86seg *dt = (s->seg & 0x0004) ? &ldt : &gdt; + int dpl = (s->access >> 5) & 3; + int valid = 1; + const x86seg *dt = (s->seg & 0x0004) ? &ldt : &gdt; if (((s->seg & 0xfff8UL) + 7UL) > dt->limit) valid = 0; @@ -334,10 +338,11 @@ void #endif loadseg(uint16_t seg, x86seg *s) { - uint16_t segdat[4]; - uint32_t addr, *segdat32 = (uint32_t *) segdat; - int dpl; - x86seg *dt; + uint16_t segdat[4]; + uint32_t addr; + uint32_t *segdat32 = (uint32_t *) segdat; + int dpl; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { if (!(seg & 0xfffc)) { @@ -531,9 +536,10 @@ loadseg(uint16_t seg, x86seg *s) void loadcs(uint16_t seg) { - uint16_t segdat[4]; - uint32_t addr, *segdat32 = (uint32_t *) segdat; - x86seg *dt; + uint16_t segdat[4]; + uint32_t addr; + uint32_t *segdat32 = (uint32_t *) segdat; + const x86seg *dt; x86seg_log("Load CS %04X\n", seg); @@ -619,11 +625,13 @@ loadcs(uint16_t seg) void loadcsjmp(uint16_t seg, uint32_t old_pc) { - uint16_t type, seg2; - uint16_t segdat[4]; - uint32_t addr, newpc; - uint32_t *segdat32 = (uint32_t *) segdat; - x86seg *dt; + uint16_t type; + uint16_t seg2; + uint16_t segdat[4]; + uint32_t addr; + uint32_t newpc; + uint32_t *segdat32 = (uint32_t *) segdat; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { if (!(seg & 0xfffc)) { @@ -743,7 +751,7 @@ loadcsjmp(uint16_t seg, uint32_t old_pc) x86gpf("loadcsjmp(): Non-conforming DPL > CPL", seg2 & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -890,16 +898,24 @@ void loadcscall(uint16_t seg) #endif { - uint16_t seg2, newss; - uint16_t segdat[4], segdat2[4]; - uint32_t addr, oldssbase = ss; - uint32_t oaddr, newpc; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - int count, type; - uint32_t oldss, oldsp, newsp, oldsp2; - uint16_t tempw; - x86seg *dt; + uint16_t seg2; + uint16_t newss; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint32_t addr; + uint32_t oldssbase = ss; + uint32_t oaddr; + uint32_t newpc; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + int count; + int type; + uint32_t oldss; + uint32_t oldsp; + uint32_t newsp; + uint32_t oldsp2; + uint16_t tempw; + const x86seg *dt; if ((msw & 1) && !(cpu_state.eflags & VM_FLAG)) { x86seg_log("Protected mode CS load! %04X\n", seg); @@ -1174,7 +1190,7 @@ loadcscall(uint16_t seg) x86gpf("loadcscall(): Call PM Gate Inner DPL > CPL", seg2 & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -1237,12 +1253,18 @@ loadcscall(uint16_t seg) void pmoderetf(int is32, uint16_t off) { - uint16_t segdat[4], segdat2[4], seg, newss; - uint32_t newpc, newsp, addr, oaddr; - uint32_t oldsp = ESP; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t seg; + uint16_t newss; + uint32_t newpc; + uint32_t newsp; + uint32_t addr; + uint32_t oaddr; + uint32_t oldsp = ESP; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; x86seg_log("RETF %i %04X:%04X %08X %04X\n", is32, CS, cpu_state.pc, cr0, cpu_state.eflags); if (is32) { @@ -1467,17 +1489,22 @@ pmoderetf(int is32, uint16_t off) void pmodeint(int num, int soft) { - uint16_t segdat[4], segdat2[4]; - uint16_t segdat3[4]; - uint16_t newss, seg = 0; - int type, new_cpl; - uint32_t addr, oaddr; - uint32_t oldss, oldsp; - uint32_t newsp; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - uint32_t *segdat332 = (uint32_t *) segdat3; - x86seg *dt; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t segdat3[4]; + uint16_t newss; + uint16_t seg = 0; + int type; + int new_cpl; + uint32_t addr; + uint32_t oaddr; + uint32_t oldss; + uint32_t oldsp; + uint32_t newsp; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + uint32_t *segdat332 = (uint32_t *) segdat3; + const x86seg *dt; if ((cpu_state.eflags & VM_FLAG) && (IOPL != 3) && soft) { x86seg_log("V86 banned int\n"); @@ -1661,7 +1688,7 @@ pmodeint(int num, int soft) x86gpf("pmodeint(): DPL != CPL", seg & 0xfffc); return; } - /*FALLTHROUGH*/ + fallthrough; case 0x1c00: case 0x1d00: case 0x1e00: @@ -1750,16 +1777,21 @@ pmodeint(int num, int soft) void pmodeiret(int is32) { - uint16_t newss, seg = 0; - uint16_t segdat[4], segdat2[4]; - uint16_t segs[4]; - uint32_t tempflags, flagmask; - uint32_t newpc, newsp; - uint32_t addr, oaddr; - uint32_t oldsp = ESP; - uint32_t *segdat32 = (uint32_t *) segdat; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t newss; + uint16_t seg = 0; + uint16_t segdat[4]; + uint16_t segdat2[4]; + uint16_t segs[4]; + uint32_t tempflags; + uint32_t flagmask; + uint32_t newpc; + uint32_t newsp; + uint32_t addr; + uint32_t oaddr; + uint32_t oldsp = ESP; + uint32_t *segdat32 = (uint32_t *) segdat; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; if (is386 && (cpu_state.eflags & VM_FLAG)) { if (IOPL != 3) { @@ -2058,15 +2090,32 @@ pmodeiret(int is32) void taskswitch286(uint16_t seg, uint16_t *segdat, int is32) { - uint16_t tempw, new_ldt; - uint16_t new_es, new_cs, new_ss, new_ds, new_fs, new_gs; - uint16_t segdat2[4]; - uint32_t base, limit; - uint32_t templ, new_cr3 = 0; - uint32_t new_eax, new_ebx, new_ecx, new_edx, new_esp, new_ebp; - uint32_t new_esi, new_edi, new_pc, new_flags, addr; - uint32_t *segdat232 = (uint32_t *) segdat2; - x86seg *dt; + uint16_t tempw; + uint16_t new_ldt; + uint16_t new_es; + uint16_t new_cs; + uint16_t new_ss; + uint16_t new_ds; + uint16_t new_fs; + uint16_t new_gs; + uint16_t segdat2[4]; + uint32_t base; + uint32_t limit; + uint32_t templ; + uint32_t new_cr3 = 0; + uint32_t new_eax; + uint32_t new_ebx; + uint32_t new_ecx; + uint32_t new_edx; + uint32_t new_esp; + uint32_t new_ebp; + uint32_t new_esi; + uint32_t new_edi; + uint32_t new_pc; + uint32_t new_flags; + uint32_t addr; + uint32_t *segdat232 = (uint32_t *) segdat2; + const x86seg *dt; base = segdat[1] | ((segdat[2] & 0x00ff) << 16); limit = segdat[0]; @@ -2447,7 +2496,8 @@ cyrix_write_seg_descriptor(uint32_t addr, x86seg *seg) void cyrix_load_seg_descriptor(uint32_t addr, x86seg *seg) { - uint16_t segdat[4], selector; + uint16_t segdat[4]; + uint16_t selector; segdat[0] = readmemw(0, addr); segdat[1] = readmemw(0, addr + 2); diff --git a/src/cpu/x87.c b/src/cpu/x87.c index 181b7b9ca..3918800aa 100644 --- a/src/cpu/x87.c +++ b/src/cpu/x87.c @@ -17,8 +17,10 @@ #include "386_common.h" #include "softfloat/softfloat-specialize.h" -uint32_t x87_pc_off, x87_op_off; -uint16_t x87_pc_seg, x87_op_seg; +uint32_t x87_pc_off; +uint32_t x87_op_off; +uint16_t x87_pc_seg; +uint16_t x87_op_seg; #ifdef ENABLE_FPU_LOG int fpu_do_log = ENABLE_FPU_LOG; @@ -43,9 +45,8 @@ uint16_t x87_gettag(void) { uint16_t ret = 0; - int c; - for (c = 0; c < 8; c++) { + for (uint8_t c = 0; c < 8; c++) { if (cpu_state.tag[c] == TAG_EMPTY) ret |= X87_TAG_EMPTY << (c * 2); else if (cpu_state.tag[c] & TAG_UINT64) @@ -62,9 +63,7 @@ x87_gettag(void) void x87_settag(uint16_t new_tag) { - int c; - - for (c = 0; c < 8; c++) { + for (uint8_t c = 0; c < 8; c++) { int tag = (new_tag >> (c * 2)) & 3; if (tag == X87_TAG_EMPTY) @@ -152,7 +151,8 @@ FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *stat return 1; } - int aIsNaN = floatx80_is_nan(a), bIsNaN = float32_is_nan(b); + int aIsNaN = floatx80_is_nan(a); + int bIsNaN = float32_is_nan(b); if (aIsNaN | bIsNaN) { *r = FPU_handle_NaN32_Func(a, aIsNaN, b, bIsNaN, status); return 1; @@ -205,7 +205,8 @@ FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *stat return 1; } - int aIsNaN = floatx80_is_nan(a), bIsNaN = float64_is_nan(b); + int aIsNaN = floatx80_is_nan(a); + int bIsNaN = float64_is_nan(b); if (aIsNaN | bIsNaN) { *r = FPU_handle_NaN64_Func(a, aIsNaN, b, bIsNaN, status); return 1; @@ -256,13 +257,13 @@ FPU_status_word_flags_fpu_compare(int float_relation) return (C0 | C2 | C3); case float_relation_greater: - return (0); + return 0; case float_relation_less: - return (C0); + return C0; case float_relation_equal: - return (C3); + return C3; } return (-1); // should never get here @@ -280,11 +281,11 @@ FPU_write_eflags_fpu_compare(int float_relation) break; case float_relation_less: - cpu_state.flags |= (C_FLAG); + cpu_state.flags |= C_FLAG; break; case float_relation_equal: - cpu_state.flags |= (Z_FLAG); + cpu_state.flags |= Z_FLAG; break; default: diff --git a/src/cpu/x87.h b/src/cpu/x87.h index 66d51dbd9..2d8708da4 100644 --- a/src/cpu/x87.h +++ b/src/cpu/x87.h @@ -3,8 +3,10 @@ #define X87_TAG_INVALID 2 #define X87_TAG_EMPTY 3 -extern uint32_t x87_pc_off, x87_op_off; -extern uint16_t x87_pc_seg, x87_op_seg; +extern uint32_t x87_pc_off; +extern uint32_t x87_op_off; +extern uint16_t x87_pc_seg; +extern uint16_t x87_op_seg; static __inline void x87_set_mmx(void) @@ -16,7 +18,7 @@ x87_set_mmx(void) } else { cpu_state.TOP = 0; p = (uint64_t *) cpu_state.tag; - *p = 0x0101010101010101ull; + *p = 0x0101010101010101ULL; } cpu_state.ismmx = 1; } diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index 0db2b3f3c..cde0128dc 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -111,6 +111,7 @@ typedef union static __inline void x87_checkexceptions(void) { + // } static __inline void @@ -169,7 +170,8 @@ x87_pop(void) static __inline int16_t x87_fround16(double b) { - int16_t a, c; + int16_t a; + int16_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -201,7 +203,8 @@ x87_fround16_64(double b) static __inline int32_t x87_fround32(double b) { - int32_t a, c; + int32_t a; + int32_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -233,7 +236,8 @@ x87_fround32_64(double b) static __inline int64_t x87_fround(double b) { - int64_t a, c; + int64_t a; + int64_t c; switch ((cpu_state.npxc >> 10) & 3) { case 0: /*Nearest*/ @@ -338,9 +342,10 @@ x87_compare(double a, double b) { #ifdef X87_INLINE_ASM uint32_t result; - double ea = a, eb = b; - const uint64_t ia = 0x3fec1a6ff866a936ull; - const uint64_t ib = 0x3fec1a6ff866a938ull; + double ea = a; + double eb = b; + const uint64_t ia = 0x3fec1a6ff866a936ULL; + const uint64_t ib = 0x3fec1a6ff866a938ULL; /* Hack to make CHKCOP happy. */ if (!memcmp(&ea, &ia, 8) && !memcmp(&eb, &ib, 8)) diff --git a/src/cpu/x87_ops_misc.h b/src/cpu/x87_ops_misc.h index cea6e6075..f4d1dd2ea 100644 --- a/src/cpu/x87_ops_misc.h +++ b/src/cpu/x87_ops_misc.h @@ -37,16 +37,17 @@ static int opFXTRACT(uint32_t fetchdat) { x87_conv_t test; - int64_t exp80, exp80final; - double mant; + int64_t exp80; + int64_t exp80final; + double mant; FP_ENTER(); cpu_state.pc++; test.eind.d = ST(0); - exp80 = test.eind.ll & (0x7ff0000000000000ll); - exp80final = (exp80 >> 52) - BIAS64; - mant = test.eind.d / (pow(2.0, (double)exp80final)); - ST(0) = (double)exp80final; + exp80 = test.eind.ll & 0x7ff0000000000000LL; + exp80final = (exp80 >> 52) - BIAS64; + mant = test.eind.d / (pow(2.0, (double) exp80final)); + ST(0) = (double) exp80final; FP_TAG_VALID; x87_push(mant); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); @@ -82,7 +83,7 @@ opFINIT(uint32_t fetchdat) #ifdef USE_NEW_DYNAREC *p = 0; #else - *p = 0x0303030303030303ll; + *p = 0x0303030303030303LL; #endif cpu_state.TOP = 0; cpu_state.ismmx = 0; @@ -410,7 +411,7 @@ FSAVE(void) #ifdef USE_NEW_DYNAREC *p = 0; #else - *p = 0x0303030303030303ll; + *p = 0x0303030303030303LL; #endif cpu_state.TOP = 0; cpu_state.ismmx = 0; @@ -629,7 +630,7 @@ opFLDLN2(uint32_t fetchdat) { FP_ENTER(); cpu_state.pc++; - x87_push_u64(0x3fe62e42fefa39f0ull); + x87_push_u64(0x3fe62e42fefa39f0ULL); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); return 0; diff --git a/src/cpu/x87_ops_sf.h b/src/cpu/x87_ops_sf.h index e70556fea..fb2f790a4 100644 --- a/src/cpu/x87_ops_sf.h +++ b/src/cpu/x87_ops_sf.h @@ -18,7 +18,8 @@ fpu_save_environment(void) switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { case 0x000: { /*16-bit real mode*/ uint16_t tmp; - uint32_t fp_ip, fp_dp; + uint32_t fp_ip; + uint32_t fp_dp; fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; @@ -60,7 +61,9 @@ fpu_save_environment(void) } break; case 0x100: { /*32-bit real mode*/ - uint32_t tmp, fp_ip, fp_dp; + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; @@ -114,7 +117,9 @@ fpu_load_environment(void) switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { case 0x000: { /*16-bit real mode*/ uint16_t tmp; - uint32_t fp_ip, fp_dp; + uint32_t fp_ip; + uint32_t fp_dp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); fp_dp = (tmp & 0xf000) << 4; tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); @@ -156,7 +161,10 @@ fpu_load_environment(void) } break; case 0x100: { /*32-bit real mode*/ - uint32_t tmp, fp_ip, fp_dp; + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); fp_dp = (tmp & 0x0ffff000) << 4; tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); diff --git a/src/cpu/x87_ops_sf_arith.h b/src/cpu/x87_ops_sf_arith.h index 10b100b2a..5dc80b432 100644 --- a/src/cpu/x87_ops_sf_arith.h +++ b/src/cpu/x87_ops_sf_arith.h @@ -203,7 +203,9 @@ sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i static int sf_FADD_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -230,7 +232,9 @@ next_ins: static int sf_FADD_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -258,7 +262,9 @@ next_ins: static int sf_FADDP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -288,7 +294,9 @@ next_ins: static int sf_FDIV_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -315,7 +323,9 @@ next_ins: static int sf_FDIV_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -341,7 +351,9 @@ next_ins: static int sf_FDIVP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -370,7 +382,9 @@ next_ins: static int sf_FDIVR_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -396,7 +410,9 @@ next_ins: static int sf_FDIVR_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -422,7 +438,9 @@ next_ins: static int sf_FDIVRP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -451,7 +469,9 @@ next_ins: static int sf_FMUL_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -478,7 +498,9 @@ next_ins: static int sf_FMUL_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -505,7 +527,9 @@ next_ins: static int sf_FMULP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -534,7 +558,9 @@ next_ins: static int sf_FSUB_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -561,7 +587,9 @@ next_ins: static int sf_FSUB_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -588,7 +616,9 @@ next_ins: static int sf_FSUBP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -617,7 +647,9 @@ next_ins: static int sf_FSUBR_st0_stj(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -644,7 +676,9 @@ next_ins: static int sf_FSUBR_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -671,7 +705,9 @@ next_ins: static int sf_FSUBRP_sti_st0(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); diff --git a/src/cpu/x87_ops_sf_compare.h b/src/cpu/x87_ops_sf_compare.h index 34bca6772..0dde17a05 100644 --- a/src/cpu/x87_ops_sf_compare.h +++ b/src/cpu/x87_ops_sf_compare.h @@ -95,7 +95,8 @@ cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i static int sf_FCOM_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -123,7 +124,8 @@ next_ins: static int sf_FCOMP_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -156,7 +158,8 @@ next_ins: static int sf_FCOMPP(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -192,7 +195,8 @@ next_ins: static int sf_FUCOMPP(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -228,7 +232,8 @@ next_ins: static int sf_FCOMI_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -256,7 +261,8 @@ next_ins: static int sf_FCOMIP_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -291,7 +297,8 @@ next_ins: static int sf_FUCOM_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -319,7 +326,8 @@ next_ins: static int sf_FUCOMP_sti(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -352,7 +360,8 @@ next_ins: static int sf_FUCOMI_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; @@ -380,7 +389,8 @@ next_ins: static int sf_FUCOMIP_st0_stj(uint32_t fetchdat) { - floatx80 a, b; + floatx80 a; + floatx80 b; struct float_status_t status; int rc; diff --git a/src/cpu/x87_ops_sf_misc.h b/src/cpu/x87_ops_sf_misc.h index d8a3d7368..3b468cbf6 100644 --- a/src/cpu/x87_ops_sf_misc.h +++ b/src/cpu/x87_ops_sf_misc.h @@ -2,8 +2,10 @@ static int sf_FXCH_sti(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 st0_reg, sti_reg; - int st0_tag, sti_tag; + floatx80 st0_reg; + floatx80 sti_reg; + int st0_tag; + int sti_tag; FP_ENTER(); FPU_check_pending_exceptions(); diff --git a/src/cpu/x87_ops_sf_trans.h b/src/cpu/x87_ops_sf_trans.h index 5289b2bbf..8f28104bd 100644 --- a/src/cpu/x87_ops_sf_trans.h +++ b/src/cpu/x87_ops_sf_trans.h @@ -104,7 +104,9 @@ next_ins: static int sf_FPATAN(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -132,7 +134,8 @@ static int sf_FXTRACT(uint32_t fetchdat) { struct float_status_t status; - floatx80 a, b; + floatx80 a; + floatx80 b; FP_ENTER(); cpu_state.pc++; @@ -175,10 +178,13 @@ next_ins: static int sf_FPREM1(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; - uint64_t quotient = 0; - int flags, cc; + uint64_t quotient = 0; + int flags; + int cc; FP_ENTER(); cpu_state.pc++; @@ -219,10 +225,13 @@ next_ins: static int sf_FPREM(uint32_t fetchdat) { - floatx80 a, b, result; + floatx80 a; + floatx80 b; + floatx80 result; struct float_status_t status; - uint64_t quotient = 0; - int flags, cc; + uint64_t quotient = 0; + int flags; + int cc; FP_ENTER(); cpu_state.pc++; @@ -291,9 +300,11 @@ next_ins: static int sf_FSINCOS(uint32_t fetchdat) { - const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); struct float_status_t status; - floatx80 y, sin_y, cos_y; + floatx80 y; + floatx80 sin_y; + floatx80 cos_y; FP_ENTER(); cpu_state.pc++; From 5cd18f3fbbc5ee5d211722b9c6160d0ab7ad6539 Mon Sep 17 00:00:00 2001 From: Jasmine Iwanek Date: Fri, 11 Aug 2023 13:00:04 -0400 Subject: [PATCH 2/4] Clang-formatting in src/cpu --- src/cpu/386.c | 32 +-- src/cpu/386_common.c | 46 ++-- src/cpu/386_common.h | 399 +++++++++++++++-------------- src/cpu/386_dynarec.c | 8 +- src/cpu/386_ops.h | 66 ++--- src/cpu/808x.c | 179 ++++++------- src/cpu/codegen_timing_common.h | 4 +- src/cpu/cpu.c | 36 +-- src/cpu/cpu.h | 32 +-- src/cpu/x86.c | 6 +- src/cpu/x86_ops.h | 3 - src/cpu/x86_ops_3dnow.h | 52 ++-- src/cpu/x86_ops_atomic.h | 4 +- src/cpu/x86_ops_i686.h | 194 +++++++------- src/cpu/x86_ops_jump.h | 2 +- src/cpu/x86_ops_misc.h | 4 +- src/cpu/x86_ops_mmx.c | 6 +- src/cpu/x86_ops_mmx.h | 12 +- src/cpu/x86_ops_mmx_arith.h | 68 ++--- src/cpu/x86_ops_mmx_cmp.h | 24 +- src/cpu/x86_ops_mmx_logic.h | 16 +- src/cpu/x86_ops_mmx_mov.h | 12 +- src/cpu/x86_ops_mmx_pack.h | 44 ++-- src/cpu/x86_ops_mmx_shift.h | 50 ++-- src/cpu/x86_ops_mov.h | 154 ++++++------ src/cpu/x86_ops_mov_seg.h | 2 +- src/cpu/x86_ops_prefix.h | 2 +- src/cpu/x86_ops_set.h | 2 +- src/cpu/x86_ops_shift.h | 2 +- src/cpu/x86_ops_stack.h | 56 +++-- src/cpu/x86_ops_xchg.h | 2 +- src/cpu/x87.c | 178 +++++++------ src/cpu/x87.h | 167 ++++++------ src/cpu/x87_ops.h | 42 ++-- src/cpu/x87_ops_arith.h | 12 +- src/cpu/x87_ops_misc.h | 34 +-- src/cpu/x87_ops_sf.h | 434 ++++++++++++++++---------------- src/cpu/x87_ops_sf_arith.h | 347 +++++++++++++------------ src/cpu/x87_ops_sf_compare.h | 237 +++++++++-------- src/cpu/x87_ops_sf_const.h | 8 +- src/cpu/x87_ops_sf_load_store.h | 267 ++++++++++---------- src/cpu/x87_ops_sf_misc.h | 12 +- src/cpu/x87_ops_sf_trans.h | 70 +++--- 43 files changed, 1705 insertions(+), 1622 deletions(-) diff --git a/src/cpu/386.c b/src/cpu/386.c index 11e87cadc..5dd143efd 100644 --- a/src/cpu/386.c +++ b/src/cpu/386.c @@ -26,7 +26,7 @@ #include <86box/machine.h> #include <86box/gdbstub.h> #ifndef OPS_286_386 -#define OPS_286_386 +# define OPS_286_386 #endif #include "386_common.h" #ifdef USE_NEW_DYNAREC @@ -204,24 +204,24 @@ fetch_ea_16_long(uint32_t rmdat) #define PREFETCH_FLUSH() prefetch_flush() #ifndef FPU_CYCLES -#define FPU_CYCLES +# define FPU_CYCLES #endif -#define OP_TABLE(name) ops_2386_##name -# define CLOCK_CYCLES(c) \ - { \ - if (fpu_cycles > 0) { \ - fpu_cycles -= (c); \ - if (fpu_cycles < 0) { \ - cycles += fpu_cycles; \ - } \ - } else { \ - cycles -= (c); \ - } \ - } +#define OP_TABLE(name) ops_2386_##name +#define CLOCK_CYCLES(c) \ + { \ + if (fpu_cycles > 0) { \ + fpu_cycles -= (c); \ + if (fpu_cycles < 0) { \ + cycles += fpu_cycles; \ + } \ + } else { \ + cycles -= (c); \ + } \ + } -# define CLOCK_CYCLES_FPU(c) cycles -= (c) -# define CONCURRENCY_CYCLES(c) fpu_cycles = (c) +#define CLOCK_CYCLES_FPU(c) cycles -= (c) +#define CONCURRENCY_CYCLES(c) fpu_cycles = (c) #define CLOCK_CYCLES_ALWAYS(c) cycles -= (c) diff --git a/src/cpu/386_common.c b/src/cpu/386_common.c index f8593fdbe..5c6b43980 100644 --- a/src/cpu/386_common.c +++ b/src/cpu/386_common.c @@ -100,22 +100,22 @@ uint32_t backupregs[16]; x86seg _oldds; -int opcode_length[256] = { 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, /* 0x0x */ - 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x1x */ - 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x2x */ - 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x3x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x5x */ - 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 1, 1, 1, /* 0x6x */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x7x */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x8x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, /* 0x9x */ - 3, 3, 3, 3, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, /* 0xax */ - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xbx */ - 3, 3, 3, 1, 3, 3, 3, 3, 3, 1, 3, 1, 1, 2, 1, 1, /* 0xcx */ - 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xdx */ - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 1, 1, 1, 1, /* 0xex */ - 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 3 }; /* 0xfx */ +int opcode_length[256] = { 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, /* 0x0x */ + 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x1x */ + 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x2x */ + 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, /* 0x3x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x5x */ + 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 1, 1, 1, /* 0x6x */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 0x7x */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x8x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, /* 0x9x */ + 3, 3, 3, 3, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, /* 0xax */ + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xbx */ + 3, 3, 3, 1, 3, 3, 3, 3, 3, 1, 3, 1, 1, 2, 1, 1, /* 0xcx */ + 3, 3, 3, 3, 2, 2, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xdx */ + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 1, 1, 1, 1, /* 0xex */ + 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 3 }; /* 0xfx */ uint32_t addr64; uint32_t addr64_2; @@ -382,7 +382,7 @@ x386_common_log(const char *fmt, ...) Note that this is only used for 286 / 386 systems. It is disabled when the internal cache on 486+ CPUs is enabled. */ -static int prefetch_bytes = 0; +static int prefetch_bytes = 0; void prefetch_run(int instr_cycles, int bytes, int modrm, int reads, int reads_l, int writes, int writes_l, int ea32) @@ -1164,13 +1164,13 @@ enter_smm(int in_hlt) memset(saved_state, 0x00, SMM_SAVE_STATE_MAP_SIZE * sizeof(uint32_t)); - if (is_cxsmm) /* Cx6x86 */ + if (is_cxsmm) /* Cx6x86 */ smram_save_state_cyrix(saved_state, in_hlt); else if (is_pentium || is_am486) /* Am486 / 5x86 / Intel P5 (Pentium) */ smram_save_state_p5(saved_state, in_hlt); - else if (is_k5 || is_k6) /* AMD K5 and K6 */ + else if (is_k5 || is_k6) /* AMD K5 and K6 */ smram_save_state_amd_k(saved_state, in_hlt); - else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */ + else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */ smram_save_state_p6(saved_state, in_hlt); cr0 &= ~0x8000000d; @@ -1335,13 +1335,13 @@ leave_smm(void) } x386_common_log("New SMBASE: %08X (%08X)\n", saved_state[SMRAM_FIELD_P5_SMBASE_OFFSET], saved_state[66]); - if (is_cxsmm) /* Cx6x86 */ + if (is_cxsmm) /* Cx6x86 */ smram_restore_state_cyrix(saved_state); else if (is_pentium || is_am486) /* Am486 / 5x86 / Intel P5 (Pentium) */ smram_restore_state_p5(saved_state); - else if (is_k5 || is_k6) /* AMD K5 and K6 */ + else if (is_k5 || is_k6) /* AMD K5 and K6 */ smram_restore_state_amd_k(saved_state); - else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */ + else if (is_p6) /* Intel P6 (Pentium Pro, Pentium II, Celeron) */ smram_restore_state_p6(saved_state); in_smm = 0; diff --git a/src/cpu/386_common.h b/src/cpu/386_common.h index 1273e43b7..b709e743d 100644 --- a/src/cpu/386_common.h +++ b/src/cpu/386_common.h @@ -20,111 +20,111 @@ #define _386_COMMON_H_ #include - #include +#include #ifdef OPS_286_386 -#define readmemb_n(s,a,b) readmembl_no_mmut_2386((s)+(a),b) -#define readmemw_n(s,a,b) readmemwl_no_mmut_2386((s)+(a),b) -#define readmeml_n(s,a,b) readmemll_no_mmut_2386((s)+(a),b) -#define readmemb(s,a) readmembl_2386((s)+(a)) -#define readmemw(s,a) readmemwl_2386((s)+(a)) -#define readmeml(s,a) readmemll_2386((s)+(a)) -#define readmemq(s,a) readmemql_2386((s)+(a)) +# define readmemb_n(s, a, b) readmembl_no_mmut_2386((s) + (a), b) +# define readmemw_n(s, a, b) readmemwl_no_mmut_2386((s) + (a), b) +# define readmeml_n(s, a, b) readmemll_no_mmut_2386((s) + (a), b) +# define readmemb(s, a) readmembl_2386((s) + (a)) +# define readmemw(s, a) readmemwl_2386((s) + (a)) +# define readmeml(s, a) readmemll_2386((s) + (a)) +# define readmemq(s, a) readmemql_2386((s) + (a)) -#define writememb_n(s,a,b,v) writemembl_no_mmut_2386((s)+(a),b,v) -#define writememw_n(s,a,b,v) writememwl_no_mmut_2386((s)+(a),b,v) -#define writememl_n(s,a,b,v) writememll_no_mmut_2386((s)+(a),b,v) -#define writememb(s,a,v) writemembl_2386((s)+(a),v) -#define writememw(s,a,v) writememwl_2386((s)+(a),v) -#define writememl(s,a,v) writememll_2386((s)+(a),v) -#define writememq(s,a,v) writememql_2386((s)+(a),v) +# define writememb_n(s, a, b, v) writemembl_no_mmut_2386((s) + (a), b, v) +# define writememw_n(s, a, b, v) writememwl_no_mmut_2386((s) + (a), b, v) +# define writememl_n(s, a, b, v) writememll_no_mmut_2386((s) + (a), b, v) +# define writememb(s, a, v) writemembl_2386((s) + (a), v) +# define writememw(s, a, v) writememwl_2386((s) + (a), v) +# define writememl(s, a, v) writememll_2386((s) + (a), v) +# define writememq(s, a, v) writememql_2386((s) + (a), v) -#define do_mmut_rb(s,a,b) do_mmutranslate_2386((s)+(a), b, 1, 0) -#define do_mmut_rw(s,a,b) do_mmutranslate_2386((s)+(a), b, 2, 0) -#define do_mmut_rl(s,a,b) do_mmutranslate_2386((s)+(a), b, 4, 0) -#define do_mmut_rb2(s,a,b) do_mmutranslate_2386((s)+(a), b, 1, 0) -#define do_mmut_rw2(s,a,b) do_mmutranslate_2386((s)+(a), b, 2, 0) -#define do_mmut_rl2(s,a,b) do_mmutranslate_2386((s)+(a), b, 4, 0) +# define do_mmut_rb(s, a, b) do_mmutranslate_2386((s) + (a), b, 1, 0) +# define do_mmut_rw(s, a, b) do_mmutranslate_2386((s) + (a), b, 2, 0) +# define do_mmut_rl(s, a, b) do_mmutranslate_2386((s) + (a), b, 4, 0) +# define do_mmut_rb2(s, a, b) do_mmutranslate_2386((s) + (a), b, 1, 0) +# define do_mmut_rw2(s, a, b) do_mmutranslate_2386((s) + (a), b, 2, 0) +# define do_mmut_rl2(s, a, b) do_mmutranslate_2386((s) + (a), b, 4, 0) -#define do_mmut_wb(s,a,b) do_mmutranslate_2386((s)+(a), b, 1, 1) -#define do_mmut_ww(s,a,b) do_mmutranslate_2386((s)+(a), b, 2, 1) -#define do_mmut_wl(s,a,b) do_mmutranslate_2386((s)+(a), b, 4, 1) +# define do_mmut_wb(s, a, b) do_mmutranslate_2386((s) + (a), b, 1, 1) +# define do_mmut_ww(s, a, b) do_mmutranslate_2386((s) + (a), b, 2, 1) +# define do_mmut_wl(s, a, b) do_mmutranslate_2386((s) + (a), b, 4, 1) #else -#define readmemb_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) ? readmembl_no_mmut((s) + (a), b) : *(uint8_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) -#define readmemw_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) ? readmemwl_no_mmut((s) + (a), b) : *(uint16_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) -#define readmeml_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) ? readmemll_no_mmut((s) + (a), b) : *(uint32_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) -#define readmemb(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) ? readmembl((s) + (a)) : *(uint8_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) -#define readmemw(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) ? readmemwl((s) + (a)) : *(uint16_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) -#define readmeml(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) ? readmemll((s) + (a)) : *(uint32_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) -#define readmemq(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 7)) ? readmemql((s) + (a)) : *(uint64_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) +# define readmemb_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) ? readmembl_no_mmut((s) + (a), b) : *(uint8_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) +# define readmemw_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) ? readmemwl_no_mmut((s) + (a), b) : *(uint16_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) +# define readmeml_n(s, a, b) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) ? readmemll_no_mmut((s) + (a), b) : *(uint32_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) +# define readmemb(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) ? readmembl((s) + (a)) : *(uint8_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) +# define readmemw(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) ? readmemwl((s) + (a)) : *(uint16_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) +# define readmeml(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) ? readmemll((s) + (a)) : *(uint32_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uint32_t) ((s) + (a)))) +# define readmemq(s, a) ((readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 7)) ? readmemql((s) + (a)) : *(uint64_t *) (readlookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a)))) -#define writememb_n(s, a, b, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ - writemembl_no_mmut((s) + (a), b, v); \ - else \ - *(uint8_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememw_n(s, a, b, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ - writememwl_no_mmut((s) + (a), b, v); \ - else \ - *(uint16_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememl_n(s, a, b, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ - writememll_no_mmut((s) + (a), b, v); \ - else \ - *(uint32_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememb(s, a, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ - writemembl((s) + (a), v); \ - else \ - *(uint8_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememw(s, a, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ - writememwl((s) + (a), v); \ - else \ - *(uint16_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememl(s, a, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ - writememll((s) + (a), v); \ - else \ - *(uint32_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define writememq(s, a, v) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 7)) \ - writememql((s) + (a), v); \ - else \ - *(uint64_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememb_n(s, a, b, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ + writemembl_no_mmut((s) + (a), b, v); \ + else \ + *(uint8_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememw_n(s, a, b, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ + writememwl_no_mmut((s) + (a), b, v); \ + else \ + *(uint16_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememl_n(s, a, b, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ + writememll_no_mmut((s) + (a), b, v); \ + else \ + *(uint32_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememb(s, a, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ + writemembl((s) + (a), v); \ + else \ + *(uint8_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememw(s, a, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ + writememwl((s) + (a), v); \ + else \ + *(uint16_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememl(s, a, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ + writememll((s) + (a), v); \ + else \ + *(uint32_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v +# define writememq(s, a, v) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 7)) \ + writememql((s) + (a), v); \ + else \ + *(uint64_t *) (writelookup2[(uint32_t) ((s) + (a)) >> 12] + (uintptr_t) ((s) + (a))) = v -#define do_mmut_rb(s, a, b) \ - if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ - do_mmutranslate((s) + (a), b, 1, 0) -#define do_mmut_rw(s, a, b) \ - if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ - do_mmutranslate((s) + (a), b, 2, 0) -#define do_mmut_rl(s, a, b) \ - if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ - do_mmutranslate((s) + (a), b, 4, 0) -#define do_mmut_rb2(s, a, b) \ - old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ - if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ - do_mmutranslate((s) + (a), b, 1, 0) -#define do_mmut_rw2(s, a, b) \ - old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ - if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ - do_mmutranslate((s) + (a), b, 2, 0) -#define do_mmut_rl2(s, a, b) \ - old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ - if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ - do_mmutranslate((s) + (a), b, 4, 0) +# define do_mmut_rb(s, a, b) \ + if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ + do_mmutranslate((s) + (a), b, 1, 0) +# define do_mmut_rw(s, a, b) \ + if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ + do_mmutranslate((s) + (a), b, 2, 0) +# define do_mmut_rl(s, a, b) \ + if (readlookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ + do_mmutranslate((s) + (a), b, 4, 0) +# define do_mmut_rb2(s, a, b) \ + old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ + if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ + do_mmutranslate((s) + (a), b, 1, 0) +# define do_mmut_rw2(s, a, b) \ + old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ + if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ + do_mmutranslate((s) + (a), b, 2, 0) +# define do_mmut_rl2(s, a, b) \ + old_rl2 = readlookup2[(uint32_t) ((s) + (a)) >> 12]; \ + if (old_rl2 == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ + do_mmutranslate((s) + (a), b, 4, 0) -#define do_mmut_wb(s, a, b) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ - do_mmutranslate((s) + (a), b, 1, 1) -#define do_mmut_ww(s, a, b) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ - do_mmutranslate((s) + (a), b, 2, 1) -#define do_mmut_wl(s, a, b) \ - if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ - do_mmutranslate((s) + (a), b, 4, 1) +# define do_mmut_wb(s, a, b) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF) \ + do_mmutranslate((s) + (a), b, 1, 1) +# define do_mmut_ww(s, a, b) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 1)) \ + do_mmutranslate((s) + (a), b, 2, 1) +# define do_mmut_wl(s, a, b) \ + if (writelookup2[(uint32_t) ((s) + (a)) >> 12] == (uintptr_t) LOOKUP_INV || (s) == 0xFFFFFFFF || (((s) + (a)) & 3)) \ + do_mmutranslate((s) + (a), b, 4, 1) #endif int checkio(uint32_t port, int mask); @@ -222,17 +222,20 @@ int checkio(uint32_t port, int mask); #ifdef OPS_286_386 /* TODO: Introduce functions to read exec. */ -static __inline uint8_t fastreadb(uint32_t a) +static __inline uint8_t +fastreadb(uint32_t a) { return readmembl(a); } -static __inline uint16_t fastreadw(uint32_t a) +static __inline uint16_t +fastreadw(uint32_t a) { return readmemwl(a); } -static __inline uint32_t fastreadl(uint32_t a) +static __inline uint32_t +fastreadl(uint32_t a) { return readmemll(a); } @@ -243,21 +246,21 @@ fastreadb(uint32_t a) uint8_t *t; if ((a >> 12) == pccache) -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint8_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint8_t *) &pccache2[a]); -#endif +# endif t = getpccache(a); if (cpu_state.abrt) return 0; pccache = a >> 12; pccache2 = t; -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint8_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint8_t *) &pccache2[a]); -#endif +# endif } static __inline uint16_t @@ -271,22 +274,22 @@ fastreadw(uint32_t a) return val; } if ((a >> 12) == pccache) -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint16_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint16_t *) &pccache2[a]); -#endif +# endif t = getpccache(a); if (cpu_state.abrt) return 0; pccache = a >> 12; pccache2 = t; -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint16_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint16_t *) &pccache2[a]); -#endif +# endif } static __inline uint32_t @@ -302,11 +305,11 @@ fastreadl(uint32_t a) pccache2 = t; pccache = a >> 12; } -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint32_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint32_t *) &pccache2[a]); -#endif +# endif } val = fastreadw(a); val |= (fastreadw(a + 2) << 16); @@ -378,22 +381,22 @@ fastreadw_fetch(uint32_t a) return val; } if ((a >> 12) == pccache) -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint16_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint16_t *) &pccache2[a]); -#endif +# endif t = getpccache(a); if (cpu_state.abrt) return 0; pccache = a >> 12; pccache2 = t; -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint16_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint16_t *) &pccache2[a]); -#endif +# endif } static __inline uint32_t @@ -409,11 +412,11 @@ fastreadl_fetch(uint32_t a) pccache2 = t; pccache = a >> 12; } -#if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) +# if (defined __amd64__ || defined _M_X64 || defined __aarch64__ || defined _M_ARM64) return *((uint32_t *) (((uintptr_t) &pccache2[a] & 0x00000000ffffffffULL) | ((uintptr_t) &pccache2[0] & 0xffffffff00000000ULL))); -#else +# else return *((uint32_t *) &pccache2[a]); -#endif +# endif } val = fastreadw_fetch(a); if (opcode_length[val & 0xff] > 2) @@ -451,65 +454,91 @@ getquad(void) } #ifdef OPS_286_386 -static __inline uint8_t geteab(void) +static __inline uint8_t +geteab(void) { if (cpu_mod == 3) - return (cpu_rm & 4) ? cpu_state.regs[cpu_rm & 3].b.h : cpu_state.regs[cpu_rm&3].b.l; + return (cpu_rm & 4) ? cpu_state.regs[cpu_rm & 3].b.h : cpu_state.regs[cpu_rm & 3].b.l; return readmemb(easeg, cpu_state.eaaddr); } -static __inline uint16_t geteaw(void) +static __inline uint16_t +geteaw(void) { if (cpu_mod == 3) return cpu_state.regs[cpu_rm].w; return readmemw(easeg, cpu_state.eaaddr); } -static __inline uint32_t geteal(void) +static __inline uint32_t +geteal(void) { if (cpu_mod == 3) return cpu_state.regs[cpu_rm].l; return readmeml(easeg, cpu_state.eaaddr); } -static __inline uint64_t geteaq(void) +static __inline uint64_t +geteaq(void) { return readmemq(easeg, cpu_state.eaaddr); } -static __inline uint8_t geteab_mem(void) +static __inline uint8_t +geteab_mem(void) { - return readmemb(easeg,cpu_state.eaaddr); + return readmemb(easeg, cpu_state.eaaddr); } -static __inline uint16_t geteaw_mem(void) +static __inline uint16_t +geteaw_mem(void) { - return readmemw(easeg,cpu_state.eaaddr); + return readmemw(easeg, cpu_state.eaaddr); } -static __inline uint32_t geteal_mem(void) +static __inline uint32_t +geteal_mem(void) { - return readmeml(easeg,cpu_state.eaaddr); + return readmeml(easeg, cpu_state.eaaddr); } -static __inline int seteaq_cwc(void) +static __inline int +seteaq_cwc(void) { CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr); return 0; } -static __inline void seteaq(uint64_t v) +static __inline void +seteaq(uint64_t v) { if (seteaq_cwc()) return; writememql(easeg + cpu_state.eaaddr, v); } -#define seteab(v) if (cpu_mod!=3) { CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr); writemembl_2386(easeg+cpu_state.eaaddr,v); } else if (cpu_rm&4) cpu_state.regs[cpu_rm&3].b.h=v; else cpu_state.regs[cpu_rm].b.l=v -#define seteaw(v) if (cpu_mod!=3) { CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); writememwl_2386(easeg+cpu_state.eaaddr,v); } else cpu_state.regs[cpu_rm].w=v -#define seteal(v) if (cpu_mod!=3) { CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); writememll_2386(easeg+cpu_state.eaaddr,v); } else cpu_state.regs[cpu_rm].l=v +# define seteab(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr); \ + writemembl_2386(easeg + cpu_state.eaaddr, v); \ + } else if (cpu_rm & 4) \ + cpu_state.regs[cpu_rm & 3].b.h = v; \ + else \ + cpu_state.regs[cpu_rm].b.l = v +# define seteaw(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ + writememwl_2386(easeg + cpu_state.eaaddr, v); \ + } else \ + cpu_state.regs[cpu_rm].w = v +# define seteal(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ + writememll_2386(easeg + cpu_state.eaaddr, v); \ + } else \ + cpu_state.regs[cpu_rm].l = v -#define seteab_mem(v) writemembl_2386(easeg+cpu_state.eaaddr,v); -#define seteaw_mem(v) writememwl_2386(easeg+cpu_state.eaaddr,v); -#define seteal_mem(v) writememll_2386(easeg+cpu_state.eaaddr,v); +# define seteab_mem(v) writemembl_2386(easeg + cpu_state.eaaddr, v); +# define seteaw_mem(v) writememwl_2386(easeg + cpu_state.eaaddr, v); +# define seteal_mem(v) writememll_2386(easeg + cpu_state.eaaddr, v); #else static __inline uint8_t geteab(void) @@ -584,51 +613,51 @@ seteaq(uint64_t v) writememql(easeg + cpu_state.eaaddr, v); } -#define seteab(v) \ - if (cpu_mod != 3) { \ - CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr); \ - if (eal_w) \ - *(uint8_t *) eal_w = v; \ - else \ - writemembl(easeg + cpu_state.eaaddr, v); \ - } else if (cpu_rm & 4) \ - cpu_state.regs[cpu_rm & 3].b.h = v; \ - else \ - cpu_state.regs[cpu_rm].b.l = v -#define seteaw(v) \ - if (cpu_mod != 3) { \ - CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ - if (eal_w) \ - *(uint16_t *) eal_w = v; \ +# define seteab(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr); \ + if (eal_w) \ + *(uint8_t *) eal_w = v; \ + else \ + writemembl(easeg + cpu_state.eaaddr, v); \ + } else if (cpu_rm & 4) \ + cpu_state.regs[cpu_rm & 3].b.h = v; \ else \ - writememwl(easeg + cpu_state.eaaddr, v); \ - } else \ - cpu_state.regs[cpu_rm].w = v -#define seteal(v) \ - if (cpu_mod != 3) { \ - CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ - if (eal_w) \ - *eal_w = v; \ - else \ - writememll(easeg + cpu_state.eaaddr, v); \ - } else \ - cpu_state.regs[cpu_rm].l = v + cpu_state.regs[cpu_rm].b.l = v +# define seteaw(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ + if (eal_w) \ + *(uint16_t *) eal_w = v; \ + else \ + writememwl(easeg + cpu_state.eaaddr, v); \ + } else \ + cpu_state.regs[cpu_rm].w = v +# define seteal(v) \ + if (cpu_mod != 3) { \ + CHECK_WRITE_COMMON(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ + if (eal_w) \ + *eal_w = v; \ + else \ + writememll(easeg + cpu_state.eaaddr, v); \ + } else \ + cpu_state.regs[cpu_rm].l = v -#define seteab_mem(v) \ - if (eal_w) \ - *(uint8_t *) eal_w = v; \ - else \ - writemembl(easeg + cpu_state.eaaddr, v); -#define seteaw_mem(v) \ - if (eal_w) \ - *(uint16_t *) eal_w = v; \ - else \ - writememwl(easeg + cpu_state.eaaddr, v); -#define seteal_mem(v) \ - if (eal_w) \ - *eal_w = v; \ - else \ - writememll(easeg + cpu_state.eaaddr, v); +# define seteab_mem(v) \ + if (eal_w) \ + *(uint8_t *) eal_w = v; \ + else \ + writemembl(easeg + cpu_state.eaaddr, v); +# define seteaw_mem(v) \ + if (eal_w) \ + *(uint16_t *) eal_w = v; \ + else \ + writememwl(easeg + cpu_state.eaaddr, v); +# define seteal_mem(v) \ + if (eal_w) \ + *eal_w = v; \ + else \ + writememll(easeg + cpu_state.eaaddr, v); #endif #define getbytef() \ diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 38261a234..a31704c1b 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -46,8 +46,8 @@ #define CPU_BLOCK_END() cpu_block_end = 1 -int inrecomp = 0; -int cpu_block_end = 0; +int inrecomp = 0; +int cpu_block_end = 0; int cpu_end_block_after_ins = 0; #ifdef ENABLE_386_DYNAREC_LOG @@ -335,9 +335,9 @@ exec386_dynarec_int(void) static __inline void exec386_dynarec_dyn(void) { - uint32_t start_pc = 0; + uint32_t start_pc = 0; uint32_t phys_addr = get_phys(cs + cpu_state.pc); - int hash = HASH(phys_addr); + int hash = HASH(phys_addr); # ifdef USE_NEW_DYNAREC codeblock_t *block = &codeblock[codeblock_hash[hash]]; # else diff --git a/src/cpu/386_ops.h b/src/cpu/386_ops.h index 6a4646e4b..dc5eea606 100644 --- a/src/cpu/386_ops.h +++ b/src/cpu/386_ops.h @@ -180,7 +180,7 @@ extern void x386_dynarec_log(const char *fmt, ...); #include "x86_ops_bit.h" #include "x86_ops_bitscan.h" #ifndef OPS_286_386 -#include "x86_ops_cyrix.h" +# include "x86_ops_cyrix.h" #endif #include "x86_ops_flag.h" #include "x86_ops_fpu.h" @@ -191,21 +191,21 @@ extern void x386_dynarec_log(const char *fmt, ...); #include "x86_ops_misc.h" #include "x87_ops.h" #ifndef OPS_286_386 -#include "x86_ops_i686.h" -#include "x86_ops_mmx.h" -#include "x86_ops_mmx_arith.h" -#include "x86_ops_mmx_cmp.h" -#include "x86_ops_mmx_logic.h" -#include "x86_ops_mmx_mov.h" -#include "x86_ops_mmx_pack.h" -#include "x86_ops_mmx_shift.h" +# include "x86_ops_i686.h" +# include "x86_ops_mmx.h" +# include "x86_ops_mmx_arith.h" +# include "x86_ops_mmx_cmp.h" +# include "x86_ops_mmx_logic.h" +# include "x86_ops_mmx_mov.h" +# include "x86_ops_mmx_pack.h" +# include "x86_ops_mmx_shift.h" #endif #include "x86_ops_mov.h" #include "x86_ops_mov_ctrl.h" #include "x86_ops_mov_seg.h" #include "x86_ops_movx.h" #ifndef OPS_286_386 -#include "x86_ops_msr.h" +# include "x86_ops_msr.h" #endif #include "x86_ops_mul.h" #include "x86_ops_pmode.h" @@ -213,26 +213,26 @@ extern void x386_dynarec_log(const char *fmt, ...); #ifdef IS_DYNAREC # include "x86_ops_rep_dyn.h" #else -#ifdef OPS_286_386 -# include "x86_ops_rep_2386.h" -#else -# include "x86_ops_rep.h" -#endif +# ifdef OPS_286_386 +# include "x86_ops_rep_2386.h" +# else +# include "x86_ops_rep.h" +# endif #endif #include "x86_ops_ret.h" #include "x86_ops_set.h" #include "x86_ops_stack.h" #ifdef OPS_286_386 -#include "x86_ops_string_2386.h" +# include "x86_ops_string_2386.h" #else -#include "x86_ops_string.h" +# include "x86_ops_string.h" #endif #include "x86_ops_xchg.h" #include "x86_ops_call.h" #include "x86_ops_shift.h" #ifndef OPS_286_386 -#include "x86_ops_amd.h" -#include "x86_ops_3dnow.h" +# include "x86_ops_amd.h" +# include "x86_ops_3dnow.h" #endif #include @@ -351,40 +351,44 @@ opVPCEXT(uint32_t fetchdat) #endif #ifdef OPS_286_386 -static int op0F_w_a16(uint32_t fetchdat) +static int +op0F_w_a16(uint32_t fetchdat) { int opcode = fetchdat & 0xff; - fopcode = opcode; + fopcode = opcode; cpu_state.pc++; PREFETCH_PREFIX(); return x86_2386_opcodes_0f[opcode](fetchdat >> 8); } -static int op0F_l_a16(uint32_t fetchdat) +static int +op0F_l_a16(uint32_t fetchdat) { int opcode = fetchdat & 0xff; - fopcode = opcode; + fopcode = opcode; cpu_state.pc++; PREFETCH_PREFIX(); return x86_2386_opcodes_0f[opcode | 0x100](fetchdat >> 8); } -static int op0F_w_a32(uint32_t fetchdat) +static int +op0F_w_a32(uint32_t fetchdat) { int opcode = fetchdat & 0xff; - fopcode = opcode; + fopcode = opcode; cpu_state.pc++; PREFETCH_PREFIX(); return x86_2386_opcodes_0f[opcode | 0x200](fetchdat >> 8); } -static int op0F_l_a32(uint32_t fetchdat) +static int +op0F_l_a32(uint32_t fetchdat) { int opcode = fetchdat & 0xff; - fopcode = opcode; + fopcode = opcode; cpu_state.pc++; PREFETCH_PREFIX(); @@ -1361,7 +1365,7 @@ const OpFn OP_TABLE(pentium_0f)[1024] = { // clang-format on }; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +# if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) const OpFn OP_TABLE(c6x86_0f)[1024] = { // clang-format off /*16-bit data, 16-bit addr*/ @@ -1453,7 +1457,7 @@ const OpFn OP_TABLE(c6x86_0f)[1024] = { /*f0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(pentiummmx_0f)[1024] = { // clang-format off @@ -1731,7 +1735,7 @@ const OpFn OP_TABLE(k62_0f)[1024] = { // clang-format on }; -#if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) +# if defined(DEV_BRANCH) && defined(USE_CYRIX_6X86) const OpFn OP_TABLE(c6x86mx_0f)[1024] = { // clang-format off /*16-bit data, 16-bit addr*/ @@ -1823,7 +1827,7 @@ const OpFn OP_TABLE(c6x86mx_0f)[1024] = { /*f0*/ ILLEGAL, opPSLLW_a32, opPSLLD_a32, opPSLLQ_a32, ILLEGAL, opPMADDWD_a32, ILLEGAL, ILLEGAL, opPSUBB_a32, opPSUBW_a32, opPSUBD_a32, ILLEGAL, opPADDB_a32, opPADDW_a32, opPADDD_a32, ILLEGAL, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(pentiumpro_0f)[1024] = { // clang-format off diff --git a/src/cpu/808x.c b/src/cpu/808x.c index 690a6e0f9..4f52fb8d2 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -76,34 +76,34 @@ static int in_rep = 0; static int repeating = 0; static int rep_c_flag = 0; static int oldc; -static int clear_lock = 0; -static int refresh = 0; +static int clear_lock = 0; +static int refresh = 0; static int cycdiff; -static int access_code = 0; -static int hlda = 0; -static int not_ready = 0; -static int bus_request_type = 0; -static int pic_data = -1; -static int last_was_code = 0; -static uint16_t mem_data = 0; -static uint32_t mem_seg = 0; -static uint16_t mem_addr = 0; -static int schedule_fetch = 1; -static int pasv = 0; +static int access_code = 0; +static int hlda = 0; +static int not_ready = 0; +static int bus_request_type = 0; +static int pic_data = -1; +static int last_was_code = 0; +static uint16_t mem_data = 0; +static uint32_t mem_seg = 0; +static uint16_t mem_addr = 0; +static int schedule_fetch = 1; +static int pasv = 0; -#define BUS_OUT 1 -#define BUS_HIGH 2 -#define BUS_WIDE 4 -#define BUS_CODE 8 -#define BUS_IO 16 -#define BUS_MEM 32 -#define BUS_PIC 64 +#define BUS_OUT 1 +#define BUS_HIGH 2 +#define BUS_WIDE 4 +#define BUS_CODE 8 +#define BUS_IO 16 +#define BUS_MEM 32 +#define BUS_PIC 64 #define BUS_ACCESS_TYPE (BUS_CODE | BUS_IO | BUS_MEM | BUS_PIC) -#define BUS_CYCLE (biu_cycles & 3) -#define BUS_CYCLE_T1 biu_cycles = 0 -#define BUS_CYCLE_NEXT biu_cycles = (biu_cycles + 1) & 3 +#define BUS_CYCLE (biu_cycles & 3) +#define BUS_CYCLE_T1 biu_cycles = 0 +#define BUS_CYCLE_NEXT biu_cycles = (biu_cycles + 1) & 3 enum { BUS_T1 = 0, @@ -221,7 +221,7 @@ clock_end(void) static void process_timers(void) -{ +{ clock_end(); clock_start(); } @@ -258,7 +258,7 @@ bus_outw(uint16_t port, uint16_t val) static uint8_t bus_inb(uint16_t port) { - int old_cycles = cycles; + int old_cycles = cycles; uint8_t ret; cycles--; @@ -271,7 +271,7 @@ bus_inb(uint16_t port) static uint16_t bus_inw(uint16_t port) { - int old_cycles = cycles; + int old_cycles = cycles; uint16_t ret; cycles--; @@ -377,7 +377,7 @@ run_bus_cycle(int io_type) not_ready = 0; } - switch(BUS_CYCLE) { + switch (BUS_CYCLE) { case BUS_T1: access_code = !!(io_type & BUS_CODE); break; @@ -394,7 +394,7 @@ run_bus_cycle(int io_type) bus_do_mem(io_type); break; case BUS_PIC: - pic_data = pic_irq_ack(); + pic_data = pic_irq_ack(); last_was_code = 0; break; default: @@ -455,17 +455,17 @@ cycles_biu(int bus, int init) switch (BUS_CYCLE) { case BUS_T1: case BUS_T2: - BUS_CYCLE_T1; /* Simply abort the prefetch before actual scheduling, no penalty. */ + BUS_CYCLE_T1; /* Simply abort the prefetch before actual scheduling, no penalty. */ break; case BUS_T3: case BUS_T4: - cycles_idle(5 - BUS_CYCLE); /* Leftover BIU cycles + 2 idle cycles. */ - BUS_CYCLE_T1; /* Abort the prefetch. */ + cycles_idle(5 - BUS_CYCLE); /* Leftover BIU cycles + 2 idle cycles. */ + BUS_CYCLE_T1; /* Abort the prefetch. */ break; } schedule_fetch = 0; - access_code = 0; + access_code = 0; } } @@ -570,7 +570,7 @@ cpu_io(int bits, int out, uint16_t port) } else { bus_request_type = BUS_IO | BUS_OUT; wait(4, 1); - schedule_fetch = 0; + schedule_fetch = 0; bus_request_type = BUS_IO | BUS_OUT | BUS_HIGH; wait(4, 1); } @@ -586,7 +586,7 @@ cpu_io(int bits, int out, uint16_t port) } else { bus_request_type = BUS_IO; wait(4, 1); - schedule_fetch = 0; + schedule_fetch = 0; bus_request_type = BUS_IO | BUS_HIGH; wait(4, 1); } @@ -609,11 +609,11 @@ readmemb(uint32_t s, uint16_t a) bus_init(); #endif - mem_seg = s; - mem_addr = a; + mem_seg = s; + mem_addr = a; bus_request_type = BUS_MEM; wait(4, 1); - ret = mem_data & 0xff; + ret = mem_data & 0xff; bus_request_type = 0; return ret; @@ -643,7 +643,7 @@ readmemw(uint32_t s, uint16_t a) bus_init(); #endif - mem_seg = s; + mem_seg = s; mem_addr = a; if (is8086 && !(a & 1)) { bus_request_type = BUS_MEM | BUS_WIDE; @@ -651,11 +651,11 @@ readmemw(uint32_t s, uint16_t a) } else { bus_request_type = BUS_MEM | BUS_HIGH; wait(4, 1); - schedule_fetch = 0; + schedule_fetch = 0; bus_request_type = BUS_MEM; wait(4, 1); } - ret = mem_data; + ret = mem_data; bus_request_type = 0; return ret; @@ -716,9 +716,9 @@ writememb(uint32_t s, uint32_t a, uint8_t v) bus_init(); #endif - mem_seg = s; - mem_addr = a; - mem_data = v; + mem_seg = s; + mem_addr = a; + mem_data = v; bus_request_type = BUS_MEM | BUS_OUT; wait(4, 1); bus_request_type = 0; @@ -737,7 +737,7 @@ writememw(uint32_t s, uint32_t a, uint16_t v) bus_init(); #endif - mem_seg = s; + mem_seg = s; mem_addr = a; mem_data = v; if (is8086 && !(a & 1)) { @@ -746,7 +746,7 @@ writememw(uint32_t s, uint32_t a, uint16_t v) } else { bus_request_type = BUS_MEM | BUS_OUT | BUS_HIGH; wait(4, 1); - schedule_fetch = 0; + schedule_fetch = 0; bus_request_type = BUS_MEM | BUS_OUT; wait(4, 1); } @@ -791,13 +791,13 @@ pfq_write(void) free in the queue. */ tempw = readmemwf(pfq_ip); *(uint16_t *) &(pfq[pfq_pos]) = tempw; - pfq_ip = (pfq_ip + 2) & 0xffff; + pfq_ip = (pfq_ip + 2) & 0xffff; pfq_pos += 2; } else if (!fetch_word && (pfq_pos < pfq_size)) { /* The 8088 fetches 1 byte at a time, and only if there's at least 1 byte free in the queue. */ pfq[pfq_pos] = readmembf(pfq_ip); - pfq_ip = (pfq_ip + 1) & 0xffff; + pfq_ip = (pfq_ip + 1) & 0xffff; pfq_pos++; } @@ -890,8 +890,8 @@ pfq_add(void) static void pfq_clear(void) { - pfq_pos = 0; - prefetching = 0; + pfq_pos = 0; + prefetching = 0; schedule_fetch = 0; BUS_CYCLE_T1; @@ -900,8 +900,8 @@ pfq_clear(void) static void pfq_suspend(void) { - pfq_clear(); - cycles_idle(3); + pfq_clear(); + cycles_idle(3); } static void @@ -955,20 +955,20 @@ reset_808x(int hard) schedule_fetch = 1; pasv = 0; - cpu_alu_op = 0; + cpu_alu_op = 0; use_custom_nmi_vector = 0x00; custom_nmi_vector = 0x00000000; - access_code = 0; - hlda = 0; - not_ready = 0; - bus_request_type = 0; - pic_data = -1; - last_was_code = 0; - mem_data = 0; - mem_seg = 0; - mem_addr = 0; + access_code = 0; + hlda = 0; + not_ready = 0; + bus_request_type = 0; + pic_data = -1; + last_was_code = 0; + mem_data = 0; + mem_seg = 0; + mem_addr = 0; } static void @@ -976,7 +976,7 @@ set_ip(uint16_t new_ip) { pfq_ip = cpu_state.pc = new_ip; prefetching = 1; - schedule_fetch = prefetching && (pfq_pos < pfq_size); + schedule_fetch = prefetching && (pfq_pos < pfq_size); } /* Memory refresh read - called by reads and writes on DMA channel 0. */ @@ -1025,16 +1025,17 @@ do_mod_rm(void) easeg = ovr_seg ? *ovr_seg : ds; wait(2, 0); return; - } else switch (cpu_rm) { - case 0: - case 3: - wait(2, 0); - break; - case 1: - case 2: - wait(3, 0); - break; - } + } else + switch (cpu_rm) { + case 0: + case 3: + wait(2, 0); + break; + case 1: + case 2: + wait(3, 0); + break; + } cpu_state.eaaddr = (*mod1add[0][cpu_rm]) + (*mod1add[1][cpu_rm]); easeg = ovr_seg ? *ovr_seg : *mod1seg[cpu_rm]; switch (rmdat & 0xc0) { @@ -1220,11 +1221,11 @@ interrupt(uint16_t addr) addr <<= 2; cpu_state.eaaddr = addr; old_cs = CS; - new_ip = readmemw(0, cpu_state.eaaddr); + new_ip = readmemw(0, cpu_state.eaaddr); wait(1, 0); cpu_state.eaaddr = (cpu_state.eaaddr + 2) & 0xffff; - new_cs = readmemw(0, cpu_state.eaaddr); - prefetching = 0; + new_cs = readmemw(0, cpu_state.eaaddr); + prefetching = 0; pfq_clear(); ovr_seg = NULL; wait(2, 0); @@ -1296,7 +1297,7 @@ bus_pic_ack(void) { int old_in_lock = in_lock; - in_lock = 1; + in_lock = 1; bus_request_type = BUS_PIC; wait(4, 1); in_lock = old_in_lock; @@ -1376,7 +1377,7 @@ rep_interrupt(void) return 0; } completed = 1; - CX = tmpc; + CX = tmpc; pfq_clear(); if (is_nec && (ovr_seg != NULL)) set_ip(cpu_state.pc - 3); @@ -2011,9 +2012,9 @@ execx86(int cycs) #if 0 opcode = pfq_fetchb(); #endif - opcode = pfq_fetchb_common(); - handled = 0; - oldc = cpu_state.flags & C_FLAG; + opcode = pfq_fetchb_common(); + handled = 0; + oldc = cpu_state.flags & C_FLAG; if (clear_lock) { in_lock = 0; clear_lock = 0; @@ -2587,7 +2588,7 @@ execx86(int cycs) break; default: - opcode = orig_opcode; + opcode = orig_opcode; cpu_state.pc = (cpu_state.pc - 1) & 0xffff; break; } @@ -3165,7 +3166,7 @@ execx86(int cycs) wait(2, 0); cpu_state.eaaddr = DI; cpu_data = readmem(es); - DI = string_increment(bits); + DI = string_increment(bits); cpu_src = cpu_data; cpu_dest = tmpa; sub(bits); @@ -3582,7 +3583,7 @@ execx86(int cycs) case 0xE5: bits = 8 << (opcode & 1); wait(1, 0); - cpu_data = pfq_fetchb(); + cpu_data = pfq_fetchb(); cpu_state.eaaddr = cpu_data; wait(1, 0); cpu_io(bits, 0, cpu_state.eaaddr); @@ -3591,16 +3592,16 @@ execx86(int cycs) case 0xE7: bits = 8 << (opcode & 1); wait(1, 0); - cpu_data = pfq_fetchb(); + cpu_data = pfq_fetchb(); cpu_state.eaaddr = cpu_data; - cpu_data = (bits == 16) ? AX : AL; + cpu_data = (bits == 16) ? AX : AL; wait(2, 0); cpu_io(bits, 1, cpu_state.eaaddr); break; case 0xEC: case 0xED: - bits = 8 << (opcode & 1); - cpu_data = DX; + bits = 8 << (opcode & 1); + cpu_data = DX; cpu_state.eaaddr = cpu_data; wait(1, 0); cpu_io(bits, 0, cpu_state.eaaddr); @@ -3609,9 +3610,9 @@ execx86(int cycs) case 0xEF: bits = 8 << (opcode & 1); wait(2, 0); - cpu_data = DX; + cpu_data = DX; cpu_state.eaaddr = cpu_data; - cpu_data = (bits == 16) ? AX : AL; + cpu_data = (bits == 16) ? AX : AL; cpu_io(bits, 1, cpu_state.eaaddr); wait(1, 0); break; diff --git a/src/cpu/codegen_timing_common.h b/src/cpu/codegen_timing_common.h index cc3ff5a6f..3d28a89de 100644 --- a/src/cpu/codegen_timing_common.h +++ b/src/cpu/codegen_timing_common.h @@ -5,9 +5,9 @@ /*Instruction has input dependency on register in R/M field*/ #define SRCDEP_RM (1ULL << 1) /*Instruction modifies register in REG field*/ -#define DSTDEP_REG (1ULL<< 2) +#define DSTDEP_REG (1ULL << 2) /*Instruction modifies register in R/M field*/ -#define DSTDEP_RM (1ULL << 3) +#define DSTDEP_RM (1ULL << 3) #define SRCDEP_SHIFT 4 #define DSTDEP_SHIFT 12 diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index c5ed8a310..42ad8aa70 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -241,7 +241,7 @@ uint32_t cache_index = 0; uint8_t _cache[2048]; uint64_t cpu_CR4_mask; -uint64_t tsc = 0; +uint64_t tsc = 0; uint64_t pmc[2] = { 0, 0 }; double cpu_dmulti; @@ -254,7 +254,7 @@ cyrix_t cyrix; cpu_family_t *cpu_f; CPU *cpu_s; -uint8_t do_translate = 0; +uint8_t do_translate = 0; uint8_t do_translate2 = 0; void (*cpu_exec)(int cycs); @@ -474,7 +474,7 @@ SF_FPU_reset(void) fpu_state.fcs = 0; fpu_state.fds = 0; fpu_state.fdp = 0; - memset(fpu_state.st_space, 0, sizeof(floatx80)*8); + memset(fpu_state.st_space, 0, sizeof(floatx80) * 8); } } @@ -561,11 +561,11 @@ cpu_set(void) x86_setopcodes(ops_386, ops_386_0f); #endif x86_setopcodes_2386(ops_2386_386, ops_2386_386_0f); - x86_opcodes_REPE = ops_REPE; - x86_opcodes_REPNE = ops_REPNE; - x86_2386_opcodes_REPE = ops_2386_REPE; + x86_opcodes_REPE = ops_REPE; + x86_opcodes_REPNE = ops_REPNE; + x86_2386_opcodes_REPE = ops_2386_REPE; x86_2386_opcodes_REPNE = ops_2386_REPNE; - x86_opcodes_3DNOW = ops_3DNOW; + x86_opcodes_3DNOW = ops_3DNOW; #ifdef USE_DYNAREC x86_dynarec_opcodes_REPE = dynarec_ops_REPE; x86_dynarec_opcodes_REPNE = dynarec_ops_REPNE; @@ -1442,9 +1442,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f, dynarec_ops_386, dynarec_ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f, dynarec_ops_386, dynarec_ops_c6x86mx_0f); -#if 0 +# if 0 x86_setopcodes(ops_386, ops_c6x86_0f, dynarec_ops_386, dynarec_ops_c6x86_0f); -#endif +# endif # else if (cpu_s->cpu_type == CPU_Cx6x86MX) x86_setopcodes(ops_386, ops_c6x86mx_0f); @@ -1452,9 +1452,9 @@ cpu_set(void) x86_setopcodes(ops_386, ops_pentium_0f); else x86_setopcodes(ops_386, ops_c6x86mx_0f); -#if 0 +# if 0 x86_setopcodes(ops_386, ops_c6x86_0f); -#endif +# endif # endif timing_rr = 1; /* register dest - register src */ @@ -1791,11 +1791,11 @@ cpu_set(void) cpu_exec = exec386_dynarec; else #endif - /* Use exec386 for CPU_IBM486SLC because it can reach 100 MHz. */ - if ((cpu_s->cpu_type == CPU_IBM486SLC) || (cpu_s->cpu_type > CPU_486DLC)) - cpu_exec = exec386; - else - cpu_exec = exec386_2386; + /* Use exec386 for CPU_IBM486SLC because it can reach 100 MHz. */ + if ((cpu_s->cpu_type == CPU_IBM486SLC) || (cpu_s->cpu_type > CPU_486DLC)) + cpu_exec = exec386; + else + cpu_exec = exec386_2386; } else if (cpu_s->cpu_type >= CPU_286) cpu_exec = exec386_2386; else @@ -3567,7 +3567,7 @@ x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f, #else x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f) { - x86_opcodes = opcodes; + x86_opcodes = opcodes; x86_opcodes_0f = opcodes_0f; } #endif @@ -3575,7 +3575,7 @@ x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f) void x86_setopcodes_2386(const OpFn *opcodes, const OpFn *opcodes_0f) { - x86_2386_opcodes = opcodes; + x86_2386_opcodes = opcodes; x86_2386_opcodes_0f = opcodes_0f; } diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index cbd1c1129..7fdfcb3d4 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -280,12 +280,12 @@ typedef struct { /* Pentium Pro, Pentium II Klamath, and Pentium II Deschutes MSR's */ uint64_t mcg_ctl; /* 0x0000017b - Machine Check Architecture */ - uint64_t ecx186; /* 0x00000186, 0x00000187 */ - uint64_t ecx187; /* 0x00000186, 0x00000187 */ + uint64_t ecx186; /* 0x00000186, 0x00000187 */ + uint64_t ecx187; /* 0x00000186, 0x00000187 */ /* Pentium Pro, Pentium II Klamath, and Pentium II Deschutes MSR's */ uint64_t debug_ctl; /* 0x000001d9 - Debug Registers Control */ - uint64_t ecx1e0; /* 0x000001e0 */ + uint64_t ecx1e0; /* 0x000001e0 */ /* Pentium Pro, Pentium II Klamath, and Pentium II Deschutes MSR's that are also on the VIA Cyrix III */ @@ -450,7 +450,7 @@ typedef struct { #define in_smm cpu_state._in_smm #define smi_line cpu_state._smi_line -#define smbase cpu_state._smbase +#define smbase cpu_state._smbase /*The cpu_state.flags below must match in both cpu_cur_status and block->status for a block to be valid*/ @@ -460,9 +460,9 @@ typedef struct { #define CPU_STATUS_V86 (1 << 3) #define CPU_STATUS_SMM (1 << 4) #ifdef USE_NEW_DYNAREC -#define CPU_STATUS_FLAGS 0xff +# define CPU_STATUS_FLAGS 0xff #else -#define CPU_STATUS_FLAGS 0xffff +# define CPU_STATUS_FLAGS 0xffff #endif /*If the cpu_state.flags below are set in cpu_cur_status, they must be set in block->status. @@ -621,8 +621,8 @@ extern uint64_t star; #define FPU_CW_Reserved_Bits (0xe0c0) -#define cr0 cpu_state.CR0.l -#define msw cpu_state.CR0.w +#define cr0 cpu_state.CR0.l +#define msw cpu_state.CR0.w extern uint32_t cr2; extern uint32_t cr3; extern uint32_t cr4; @@ -726,8 +726,8 @@ extern void loadseg_dynarec(uint16_t seg, x86seg *s); extern int loadseg(uint16_t seg, x86seg *s); extern void loadcs(uint16_t seg); #else -extern void loadseg(uint16_t seg, x86seg *s); -extern void loadcs(uint16_t seg); +extern void loadseg(uint16_t seg, x86seg *s); +extern void loadcs(uint16_t seg); #endif extern char *cpu_current_pc(char *bufp); @@ -764,11 +764,11 @@ extern void pmodeint(int num, int soft); extern void pmoderetf(int is32, uint16_t off); extern void pmodeiret(int is32); #else -extern void loadcscall(uint16_t seg); -extern void loadcsjmp(uint16_t seg, uint32_t old_pc); -extern void pmodeint(int num, int soft); -extern void pmoderetf(int is32, uint16_t off); -extern void pmodeiret(int is32); +extern void loadcscall(uint16_t seg); +extern void loadcsjmp(uint16_t seg, uint32_t old_pc); +extern void pmodeint(int num, int soft); +extern void pmoderetf(int is32, uint16_t off); +extern void pmodeiret(int is32); #endif extern void resetmcr(void); extern void resetx86(void); @@ -859,7 +859,7 @@ extern void cpu_fast_off_reset(void); extern void smi_raise(void); extern void nmi_raise(void); -extern MMX_REG *MMP[8]; +extern MMX_REG *MMP[8]; extern uint16_t *MMEP[8]; extern void mmx_init(void); diff --git a/src/cpu/x86.c b/src/cpu/x86.c index 182431773..f994e1946 100644 --- a/src/cpu/x86.c +++ b/src/cpu/x86.c @@ -59,11 +59,11 @@ uint32_t rmdat; uint64_t xt_cpu_multi; /* Variables for handling the non-maskable interrupts. */ -int nmi = 0; +int nmi = 0; int nmi_auto_clear = 0; /* Was the CPU ever reset? */ -int x86_was_reset = 0; +int x86_was_reset = 0; int soft_reset_pci = 0; /* Is the TRAP flag on? */ @@ -273,7 +273,7 @@ reset_common(int hard) loadcs(0xF000); cpu_state.pc = 0xFFF0; if (hard) { - rammask = cpu_16bitbus ? 0xFFFFFF : 0xFFFFFFFF; + rammask = cpu_16bitbus ? 0xFFFFFF : 0xFFFFFFFF; if (is6117) rammask |= 0x03000000; mem_a20_key = mem_a20_alt = mem_a20_state = 0; diff --git a/src/cpu/x86_ops.h b/src/cpu/x86_ops.h index ee3618d52..6fb9b7a22 100644 --- a/src/cpu/x86_ops.h +++ b/src/cpu/x86_ops.h @@ -190,7 +190,6 @@ extern const OpFn dynarec_ops_3DNOWE[256]; extern void x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f); #endif - extern const OpFn *x86_opcodes; extern const OpFn *x86_opcodes_0f; extern const OpFn *x86_opcodes_d8_a16; @@ -330,7 +329,6 @@ extern const OpFn ops_REPNE[1024]; extern const OpFn ops_3DNOW[256]; extern const OpFn ops_3DNOWE[256]; - extern void x86_setopcodes_2386(const OpFn *opcodes, const OpFn *opcodes_0f); extern const OpFn *x86_2386_opcodes; @@ -436,7 +434,6 @@ extern const OpFn ops_2386_REPE[1024]; extern const OpFn ops_2386_REPNE[1024]; extern const OpFn ops_2386_3DNOW[256]; - #define C0 (1 << 8) #define C1 (1 << 9) #define C2 (1 << 10) diff --git a/src/cpu/x86_ops_3dnow.h b/src/cpu/x86_ops_3dnow.h index e9826a7e3..b72cbc06c 100644 --- a/src/cpu/x86_ops_3dnow.h +++ b/src/cpu/x86_ops_3dnow.h @@ -35,7 +35,7 @@ opFEMMS(uint32_t fetchdat) static int opPAVGUSB(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -56,7 +56,7 @@ opPAVGUSB(uint32_t fetchdat) static int opPF2ID(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -71,7 +71,7 @@ opPF2ID(uint32_t fetchdat) static int opPF2IW(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -86,9 +86,9 @@ opPF2IW(uint32_t fetchdat) static int opPFACC(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf; + float tempf; MMX_GETSRC(); @@ -103,9 +103,9 @@ opPFACC(uint32_t fetchdat) static int opPFNACC(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf; + float tempf; MMX_GETSRC(); @@ -120,9 +120,9 @@ opPFNACC(uint32_t fetchdat) static int opPFPNACC(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf; + float tempf; MMX_GETSRC(); @@ -137,10 +137,10 @@ opPFPNACC(uint32_t fetchdat) static int opPSWAPD(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); - float tempf; - float tempf2; + float tempf; + float tempf2; MMX_GETSRC(); @@ -157,7 +157,7 @@ opPSWAPD(uint32_t fetchdat) static int opPFADD(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -172,7 +172,7 @@ opPFADD(uint32_t fetchdat) static int opPFCMPEQ(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -187,7 +187,7 @@ opPFCMPEQ(uint32_t fetchdat) static int opPFCMPGE(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -202,7 +202,7 @@ opPFCMPGE(uint32_t fetchdat) static int opPFCMPGT(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -217,7 +217,7 @@ opPFCMPGT(uint32_t fetchdat) static int opPFMAX(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -234,7 +234,7 @@ opPFMAX(uint32_t fetchdat) static int opPFMIN(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -251,7 +251,7 @@ opPFMIN(uint32_t fetchdat) static int opPFMUL(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -295,7 +295,7 @@ opPFRCP(uint32_t fetchdat) static int opPFRCPIT1(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -310,7 +310,7 @@ opPFRCPIT1(uint32_t fetchdat) static int opPFRCPIT2(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -364,7 +364,7 @@ opPFRSQIT1(uint32_t fetchdat) static int opPFSUB(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -379,7 +379,7 @@ opPFSUB(uint32_t fetchdat) static int opPFSUBR(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -394,7 +394,7 @@ opPFSUBR(uint32_t fetchdat) static int opPI2FD(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -409,7 +409,7 @@ opPI2FD(uint32_t fetchdat) static int opPI2FW(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); MMX_GETSRC(); @@ -424,7 +424,7 @@ opPI2FW(uint32_t fetchdat) static int opPMULHRW(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst = MMX_GETREGP(cpu_reg); if (cpu_mod == 3) { diff --git a/src/cpu/x86_ops_atomic.h b/src/cpu/x86_ops_atomic.h index f0cab34e1..43a3708e0 100644 --- a/src/cpu/x86_ops_atomic.h +++ b/src/cpu/x86_ops_atomic.h @@ -133,7 +133,7 @@ opCMPXCHG8B_a16(uint32_t fetchdat) { uint32_t temp; uint32_t temp_hi; - uint32_t temp2 = EAX; + uint32_t temp2 = EAX; uint32_t temp2_hi = EDX; fetch_ea_16(fetchdat); @@ -164,7 +164,7 @@ opCMPXCHG8B_a32(uint32_t fetchdat) { uint32_t temp; uint32_t temp_hi; - uint32_t temp2 = EAX; + uint32_t temp2 = EAX; uint32_t temp2_hi = EDX; fetch_ea_32(fetchdat); diff --git a/src/cpu/x86_ops_i686.h b/src/cpu/x86_ops_i686.h index f11bca945..ab9d02d25 100644 --- a/src/cpu/x86_ops_i686.h +++ b/src/cpu/x86_ops_i686.h @@ -46,10 +46,10 @@ opSYSEXIT(uint32_t fetchdat) static int sf_fx_save_stor_common(uint32_t fetchdat, int bits) { - uint8_t fxinst = 0; - uint32_t tag_byte; - unsigned index; - floatx80 reg; + uint8_t fxinst = 0; + uint32_t tag_byte; + unsigned index; + floatx80 reg; if (CPUID < 0x650) return ILLEGAL(fetchdat); @@ -129,33 +129,33 @@ sf_fx_save_stor_common(uint32_t fetchdat, int bits) /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */ writememw(easeg, cpu_state.eaaddr + 6, fpu_state.foo); - /* - * x87 FPU IP Offset (32/64 bits) - * The contents of this field differ depending on the current - * addressing mode (16/32/64 bit) when the FXSAVE instruction was executed: - * + 64-bit mode - 64-bit IP offset - * + 32-bit mode - 32-bit IP offset - * + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved. - * x87 CS FPU IP Selector - * + 16 bit, in 16/32 bit mode only - */ + /* + * x87 FPU IP Offset (32/64 bits) + * The contents of this field differ depending on the current + * addressing mode (16/32/64 bit) when the FXSAVE instruction was executed: + * + 64-bit mode - 64-bit IP offset + * + 32-bit mode - 32-bit IP offset + * + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved. + * x87 CS FPU IP Selector + * + 16 bit, in 16/32 bit mode only + */ writememl(easeg, cpu_state.eaaddr + 8, fpu_state.fip); writememl(easeg, cpu_state.eaaddr + 12, fpu_state.fcs); - /* - * x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits) - * The contents of this field differ depending on the current - * addressing mode (16/32 bit) when the FXSAVE instruction was executed: - * + 64-bit mode - 64-bit offset - * + 32-bit mode - 32-bit offset - * + 16-bit mode - low 16 bits are offset; high 16 bits are reserved. - * x87 DS FPU Instruction Operand (Data) Pointer Selector - * + 16 bit, in 16/32 bit mode only - */ + /* + * x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits) + * The contents of this field differ depending on the current + * addressing mode (16/32 bit) when the FXSAVE instruction was executed: + * + 64-bit mode - 64-bit offset + * + 32-bit mode - 32-bit offset + * + 16-bit mode - low 16 bits are offset; high 16 bits are reserved. + * x87 DS FPU Instruction Operand (Data) Pointer Selector + * + 16 bit, in 16/32 bit mode only + */ writememl(easeg, cpu_state.eaaddr + 16, fpu_state.fdp); writememl(easeg, cpu_state.eaaddr + 20, fpu_state.fds); - /* store i387 register file */ + /* store i387 register file */ for (index = 0; index < 8; index++) { const floatx80 fp = FPU_read_regi(index); @@ -172,72 +172,72 @@ sf_fx_save_stor_common(uint32_t fetchdat, int bits) static int fx_save_stor_common(uint32_t fetchdat, int bits) { - uint8_t fxinst = 0; - uint16_t twd = x87_gettag(); - uint32_t old_eaaddr = 0; - uint8_t ftwb = 0; - uint16_t rec_ftw = 0; - uint16_t fpus = 0; - int i; - int mmx_tags = 0; - uint16_t exp = 0x0000; - uint64_t mant = 0x0000000000000000ULL; - uint64_t fraction; - uint8_t jm; - uint8_t valid; - /* Exp_all_1 Exp_all_0 Frac_all_0 J M FTW_Valid | Ent - ----------------------------------------------+------ */ - uint8_t ftw_table_idx; - uint8_t ftw_table[48] = { 0x03, /* 0 0 0 0 0 0 | 0x00 */ - 0x02, /* 0 0 0 0 0 1 | 0x01 */ - 0x03, /* 0 0 0 0 0 0 | 0x02 */ - 0x02, /* 0 0 0 0 1 1 | 0x03 */ - 0x03, /* 0 0 0 1 0 0 | 0x04 */ - 0x00, /* 0 0 0 1 0 1 | 0x05 */ - 0x03, /* 0 0 0 1 1 0 | 0x06 */ - 0x00, /* 0 0 0 1 1 1 | 0x07 */ - 0x03, /* 0 0 1 0 0 0 | 0x08 */ - 0x02, /* 0 0 1 0 0 1 | 0x09 */ - 0x03, /* 0 0 1 0 1 0 | 0x0a - Impossible */ - 0x03, /* 0 0 1 0 1 1 | 0x0b - Impossible */ - 0x03, /* 0 0 1 1 0 0 | 0x0c */ - 0x02, /* 0 0 1 1 0 1 | 0x0d */ - 0x03, /* 0 0 1 1 1 0 | 0x0e - Impossible */ - 0x03, /* 0 0 1 1 1 1 | 0x0f - Impossible */ - 0x03, /* 0 1 0 0 0 0 | 0x10 */ - 0x02, /* 0 1 0 0 0 1 | 0x11 */ - 0x03, /* 0 1 0 0 1 0 | 0x12 */ - 0x02, /* 0 1 0 0 1 1 | 0x13 */ - 0x03, /* 0 1 0 1 0 0 | 0x14 */ - 0x02, /* 0 1 0 1 0 1 | 0x15 */ - 0x03, /* 0 1 0 1 1 0 | 0x16 */ - 0x02, /* 0 1 0 1 1 1 | 0x17 */ - 0x03, /* 0 1 1 0 0 0 | 0x18 */ - 0x01, /* 0 1 1 0 0 1 | 0x19 */ - 0x03, /* 0 1 1 0 1 0 | 0x1a - Impossible */ - 0x03, /* 0 1 1 0 1 1 | 0x1b - Impossible */ - 0x03, /* 0 1 1 1 0 0 | 0x1c */ - 0x01, /* 0 1 1 1 0 1 | 0x1d */ - 0x03, /* 0 1 1 1 1 0 | 0x1e - Impossible */ - 0x03, /* 0 1 1 1 1 1 | 0x1f - Impossible */ - 0x03, /* 1 0 0 0 0 0 | 0x20 */ - 0x02, /* 1 0 0 0 0 1 | 0x21 */ - 0x03, /* 1 0 0 0 1 0 | 0x22 */ - 0x02, /* 1 0 0 0 1 1 | 0x23 */ - 0x03, /* 1 0 0 1 0 0 | 0x24 */ - 0x02, /* 1 0 0 1 0 1 | 0x25 */ - 0x03, /* 1 0 0 1 1 0 | 0x26 */ - 0x02, /* 1 0 0 1 1 1 | 0x27 */ - 0x03, /* 1 0 1 0 0 0 | 0x28 */ - 0x02, /* 1 0 1 0 0 1 | 0x29 */ - 0x03, /* 1 0 1 0 1 0 | 0x2a - Impossible */ - 0x03, /* 1 0 1 0 1 1 | 0x2b - Impossible */ - 0x03, /* 1 0 1 1 0 0 | 0x2c */ - 0x02, /* 1 0 1 1 0 1 | 0x2d */ - 0x03, /* 1 0 1 1 1 0 | 0x2e - Impossible */ - 0x03 }; /* 1 0 1 1 1 1 | 0x2f - Impossible */ - /* M is the most significant bit of the franction, so it is impossible - for M to o be 1 when the fraction is all 0's. */ + uint8_t fxinst = 0; + uint16_t twd = x87_gettag(); + uint32_t old_eaaddr = 0; + uint8_t ftwb = 0; + uint16_t rec_ftw = 0; + uint16_t fpus = 0; + int i; + int mmx_tags = 0; + uint16_t exp = 0x0000; + uint64_t mant = 0x0000000000000000ULL; + uint64_t fraction; + uint8_t jm; + uint8_t valid; + /* Exp_all_1 Exp_all_0 Frac_all_0 J M FTW_Valid | Ent + ----------------------------------------------+------ */ + uint8_t ftw_table_idx; + uint8_t ftw_table[48] = { 0x03, /* 0 0 0 0 0 0 | 0x00 */ + 0x02, /* 0 0 0 0 0 1 | 0x01 */ + 0x03, /* 0 0 0 0 0 0 | 0x02 */ + 0x02, /* 0 0 0 0 1 1 | 0x03 */ + 0x03, /* 0 0 0 1 0 0 | 0x04 */ + 0x00, /* 0 0 0 1 0 1 | 0x05 */ + 0x03, /* 0 0 0 1 1 0 | 0x06 */ + 0x00, /* 0 0 0 1 1 1 | 0x07 */ + 0x03, /* 0 0 1 0 0 0 | 0x08 */ + 0x02, /* 0 0 1 0 0 1 | 0x09 */ + 0x03, /* 0 0 1 0 1 0 | 0x0a - Impossible */ + 0x03, /* 0 0 1 0 1 1 | 0x0b - Impossible */ + 0x03, /* 0 0 1 1 0 0 | 0x0c */ + 0x02, /* 0 0 1 1 0 1 | 0x0d */ + 0x03, /* 0 0 1 1 1 0 | 0x0e - Impossible */ + 0x03, /* 0 0 1 1 1 1 | 0x0f - Impossible */ + 0x03, /* 0 1 0 0 0 0 | 0x10 */ + 0x02, /* 0 1 0 0 0 1 | 0x11 */ + 0x03, /* 0 1 0 0 1 0 | 0x12 */ + 0x02, /* 0 1 0 0 1 1 | 0x13 */ + 0x03, /* 0 1 0 1 0 0 | 0x14 */ + 0x02, /* 0 1 0 1 0 1 | 0x15 */ + 0x03, /* 0 1 0 1 1 0 | 0x16 */ + 0x02, /* 0 1 0 1 1 1 | 0x17 */ + 0x03, /* 0 1 1 0 0 0 | 0x18 */ + 0x01, /* 0 1 1 0 0 1 | 0x19 */ + 0x03, /* 0 1 1 0 1 0 | 0x1a - Impossible */ + 0x03, /* 0 1 1 0 1 1 | 0x1b - Impossible */ + 0x03, /* 0 1 1 1 0 0 | 0x1c */ + 0x01, /* 0 1 1 1 0 1 | 0x1d */ + 0x03, /* 0 1 1 1 1 0 | 0x1e - Impossible */ + 0x03, /* 0 1 1 1 1 1 | 0x1f - Impossible */ + 0x03, /* 1 0 0 0 0 0 | 0x20 */ + 0x02, /* 1 0 0 0 0 1 | 0x21 */ + 0x03, /* 1 0 0 0 1 0 | 0x22 */ + 0x02, /* 1 0 0 0 1 1 | 0x23 */ + 0x03, /* 1 0 0 1 0 0 | 0x24 */ + 0x02, /* 1 0 0 1 0 1 | 0x25 */ + 0x03, /* 1 0 0 1 1 0 | 0x26 */ + 0x02, /* 1 0 0 1 1 1 | 0x27 */ + 0x03, /* 1 0 1 0 0 0 | 0x28 */ + 0x02, /* 1 0 1 0 0 1 | 0x29 */ + 0x03, /* 1 0 1 0 1 0 | 0x2a - Impossible */ + 0x03, /* 1 0 1 0 1 1 | 0x2b - Impossible */ + 0x03, /* 1 0 1 1 0 0 | 0x2c */ + 0x02, /* 1 0 1 1 0 1 | 0x2d */ + 0x03, /* 1 0 1 1 1 0 | 0x2e - Impossible */ + 0x03 }; /* 1 0 1 1 1 1 | 0x2f - Impossible */ + /* M is the most significant bit of the franction, so it is impossible + for M to o be 1 when the fraction is all 0's. */ if (CPUID < 0x650) return ILLEGAL(fetchdat); @@ -287,11 +287,11 @@ fx_save_stor_common(uint32_t fetchdat, int bits) for (i = 0; i <= 7; i++) { cpu_state.eaaddr = old_eaaddr + 32 + (i << 4); - mant = readmemq(easeg, cpu_state.eaaddr); - fraction = mant & 0x7fffffffffffffffULL; - exp = readmemw(easeg, cpu_state.eaaddr + 8); - jm = (mant >> 62) & 0x03; - valid = !(ftwb & (1 << i)); + mant = readmemq(easeg, cpu_state.eaaddr); + fraction = mant & 0x7fffffffffffffffULL; + exp = readmemw(easeg, cpu_state.eaaddr + 8); + jm = (mant >> 62) & 0x03; + valid = !(ftwb & (1 << i)); ftw_table_idx = (!!(exp == 0x1111)) << 5; ftw_table_idx |= (!!(exp == 0x0000)) << 4; @@ -381,7 +381,7 @@ fx_save_stor_common(uint32_t fetchdat, int bits) static int opFXSAVESTOR_a16(uint32_t fetchdat) { - if (fpu_softfloat) + if (fpu_softfloat) return sf_fx_save_stor_common(fetchdat, 16); return fx_save_stor_common(fetchdat, 16); diff --git a/src/cpu/x86_ops_jump.h b/src/cpu/x86_ops_jump.h index 33e1ed4f0..091e0da35 100644 --- a/src/cpu/x86_ops_jump.h +++ b/src/cpu/x86_ops_jump.h @@ -86,7 +86,7 @@ opJ(L) opJ(NL) opJ(LE) opJ(NLE) -// clang-format on + // clang-format on static int opLOOPNE_w(uint32_t fetchdat) diff --git a/src/cpu/x86_ops_misc.h b/src/cpu/x86_ops_misc.h index 170457caa..e36fa4800 100644 --- a/src/cpu/x86_ops_misc.h +++ b/src/cpu/x86_ops_misc.h @@ -289,7 +289,7 @@ static int opF7_w_a16(uint32_t fetchdat) { uint32_t templ; - uint32_t templ2 = 0; + uint32_t templ2 = 0; int tempws; int tempws2 = 0; int16_t temps16; @@ -402,7 +402,7 @@ static int opF7_w_a32(uint32_t fetchdat) { uint32_t templ; - uint32_t templ2 = 0; + uint32_t templ2 = 0; int tempws; int tempws2 = 1; int16_t temps16; diff --git a/src/cpu/x86_ops_mmx.c b/src/cpu/x86_ops_mmx.c index 1afc4fba2..6d4dd5557 100644 --- a/src/cpu/x86_ops_mmx.c +++ b/src/cpu/x86_ops_mmx.c @@ -27,7 +27,7 @@ #include "x86_flags.h" #include "x86seg.h" -MMX_REG *MMP[8]; +MMX_REG *MMP[8]; uint16_t *MMEP[8]; static uint16_t MME[8]; @@ -40,10 +40,10 @@ mmx_init(void) for (uint8_t i = 0; i < 8; i++) { if (fpu_softfloat) { - MMP[i] = (MMX_REG *) &fpu_state.st_space[i].fraction; + MMP[i] = (MMX_REG *) &fpu_state.st_space[i].fraction; MMEP[i] = (uint16_t *) &fpu_state.st_space[i].exp; } else { - MMP[i] = &(cpu_state.MM[i]); + MMP[i] = &(cpu_state.MM[i]); MMEP[i] = &(MME[i]); } } diff --git a/src/cpu/x86_ops_mmx.h b/src/cpu/x86_ops_mmx.h index 47751d059..ab5e19762 100644 --- a/src/cpu/x86_ops_mmx.h +++ b/src/cpu/x86_ops_mmx.h @@ -1,12 +1,12 @@ -#define SSATB(val) (((val) < -128) ? -128 : (((val) > 127) ? 127 : (val))) -#define SSATW(val) (((val) < -32768) ? -32768 : (((val) > 32767) ? 32767 : (val))) -#define USATB(val) (((val) < 0) ? 0 : (((val) > 255) ? 255 : (val))) -#define USATW(val) (((val) < 0) ? 0 : (((val) > 65535) ? 65535 : (val))) +#define SSATB(val) (((val) < -128) ? -128 : (((val) > 127) ? 127 : (val))) +#define SSATW(val) (((val) < -32768) ? -32768 : (((val) > 32767) ? 32767 : (val))) +#define USATB(val) (((val) < 0) ? 0 : (((val) > 255) ? 255 : (val))) +#define USATW(val) (((val) < 0) ? 0 : (((val) > 65535) ? 65535 : (val))) #define MMX_GETREGP(r) MMP[r] -#define MMX_GETREG(r) *(MMP[r]) +#define MMX_GETREG(r) *(MMP[r]) -#define MMX_SETEXP(r) \ +#define MMX_SETEXP(r) \ *(MMEP[r]) = 0xffff #define MMX_GETSRC() \ diff --git a/src/cpu/x86_ops_mmx_arith.h b/src/cpu/x86_ops_mmx_arith.h index 642e99c8a..099789970 100644 --- a/src/cpu/x86_ops_mmx_arith.h +++ b/src/cpu/x86_ops_mmx_arith.h @@ -1,7 +1,7 @@ static int opPADDB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -27,7 +27,7 @@ opPADDB_a16(uint32_t fetchdat) static int opPADDB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -54,7 +54,7 @@ opPADDB_a32(uint32_t fetchdat) static int opPADDW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -76,7 +76,7 @@ opPADDW_a16(uint32_t fetchdat) static int opPADDW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -99,7 +99,7 @@ opPADDW_a32(uint32_t fetchdat) static int opPADDD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -119,7 +119,7 @@ opPADDD_a16(uint32_t fetchdat) static int opPADDD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -140,7 +140,7 @@ opPADDD_a32(uint32_t fetchdat) static int opPADDSB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -166,7 +166,7 @@ opPADDSB_a16(uint32_t fetchdat) static int opPADDSB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -193,7 +193,7 @@ opPADDSB_a32(uint32_t fetchdat) static int opPADDUSB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -219,7 +219,7 @@ opPADDUSB_a16(uint32_t fetchdat) static int opPADDUSB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -246,7 +246,7 @@ opPADDUSB_a32(uint32_t fetchdat) static int opPADDSW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -268,7 +268,7 @@ opPADDSW_a16(uint32_t fetchdat) static int opPADDSW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -291,7 +291,7 @@ opPADDSW_a32(uint32_t fetchdat) static int opPADDUSW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -313,7 +313,7 @@ opPADDUSW_a16(uint32_t fetchdat) static int opPADDUSW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -336,7 +336,7 @@ opPADDUSW_a32(uint32_t fetchdat) static int opPMADDWD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -363,7 +363,7 @@ opPMADDWD_a16(uint32_t fetchdat) static int opPMADDWD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -391,7 +391,7 @@ opPMADDWD_a32(uint32_t fetchdat) static int opPMULLW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -422,7 +422,7 @@ opPMULLW_a16(uint32_t fetchdat) static int opPMULLW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -454,7 +454,7 @@ opPMULLW_a32(uint32_t fetchdat) static int opPMULHW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -485,7 +485,7 @@ opPMULHW_a16(uint32_t fetchdat) static int opPMULHW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -517,7 +517,7 @@ opPMULHW_a32(uint32_t fetchdat) static int opPSUBB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -543,7 +543,7 @@ opPSUBB_a16(uint32_t fetchdat) static int opPSUBB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -570,7 +570,7 @@ opPSUBB_a32(uint32_t fetchdat) static int opPSUBW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -592,7 +592,7 @@ opPSUBW_a16(uint32_t fetchdat) static int opPSUBW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -615,7 +615,7 @@ opPSUBW_a32(uint32_t fetchdat) static int opPSUBD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -635,7 +635,7 @@ opPSUBD_a16(uint32_t fetchdat) static int opPSUBD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -656,7 +656,7 @@ opPSUBD_a32(uint32_t fetchdat) static int opPSUBSB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -682,7 +682,7 @@ opPSUBSB_a16(uint32_t fetchdat) static int opPSUBSB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -709,7 +709,7 @@ opPSUBSB_a32(uint32_t fetchdat) static int opPSUBUSB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -735,7 +735,7 @@ opPSUBUSB_a16(uint32_t fetchdat) static int opPSUBUSB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -762,7 +762,7 @@ opPSUBUSB_a32(uint32_t fetchdat) static int opPSUBSW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -784,7 +784,7 @@ opPSUBSW_a16(uint32_t fetchdat) static int opPSUBSW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -807,7 +807,7 @@ opPSUBSW_a32(uint32_t fetchdat) static int opPSUBUSW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -829,7 +829,7 @@ opPSUBUSW_a16(uint32_t fetchdat) static int opPSUBUSW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); diff --git a/src/cpu/x86_ops_mmx_cmp.h b/src/cpu/x86_ops_mmx_cmp.h index b3081b8e8..141dba6bc 100644 --- a/src/cpu/x86_ops_mmx_cmp.h +++ b/src/cpu/x86_ops_mmx_cmp.h @@ -1,7 +1,7 @@ static int opPCMPEQB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -27,7 +27,7 @@ opPCMPEQB_a16(uint32_t fetchdat) static int opPCMPEQB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -54,7 +54,7 @@ opPCMPEQB_a32(uint32_t fetchdat) static int opPCMPGTB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -80,7 +80,7 @@ opPCMPGTB_a16(uint32_t fetchdat) static int opPCMPGTB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -107,7 +107,7 @@ opPCMPGTB_a32(uint32_t fetchdat) static int opPCMPEQW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -129,7 +129,7 @@ opPCMPEQW_a16(uint32_t fetchdat) static int opPCMPEQW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -152,7 +152,7 @@ opPCMPEQW_a32(uint32_t fetchdat) static int opPCMPGTW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -174,7 +174,7 @@ opPCMPGTW_a16(uint32_t fetchdat) static int opPCMPGTW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -197,7 +197,7 @@ opPCMPGTW_a32(uint32_t fetchdat) static int opPCMPEQD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -217,7 +217,7 @@ opPCMPEQD_a16(uint32_t fetchdat) static int opPCMPEQD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -238,7 +238,7 @@ opPCMPEQD_a32(uint32_t fetchdat) static int opPCMPGTD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -258,7 +258,7 @@ opPCMPGTD_a16(uint32_t fetchdat) static int opPCMPGTD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); diff --git a/src/cpu/x86_ops_mmx_logic.h b/src/cpu/x86_ops_mmx_logic.h index 26d7c1693..617219845 100644 --- a/src/cpu/x86_ops_mmx_logic.h +++ b/src/cpu/x86_ops_mmx_logic.h @@ -1,7 +1,7 @@ static int opPAND_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -20,7 +20,7 @@ opPAND_a16(uint32_t fetchdat) static int opPAND_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -40,7 +40,7 @@ opPAND_a32(uint32_t fetchdat) static int opPANDN_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -59,7 +59,7 @@ opPANDN_a16(uint32_t fetchdat) static int opPANDN_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -79,7 +79,7 @@ opPANDN_a32(uint32_t fetchdat) static int opPOR_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -98,7 +98,7 @@ opPOR_a16(uint32_t fetchdat) static int opPOR_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -118,7 +118,7 @@ opPOR_a32(uint32_t fetchdat) static int opPXOR_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -137,7 +137,7 @@ opPXOR_a16(uint32_t fetchdat) static int opPXOR_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); diff --git a/src/cpu/x86_ops_mmx_mov.h b/src/cpu/x86_ops_mmx_mov.h index f04e271ef..c72c8143d 100644 --- a/src/cpu/x86_ops_mmx_mov.h +++ b/src/cpu/x86_ops_mmx_mov.h @@ -176,14 +176,14 @@ static int opMOVQ_q_mm_a16(uint32_t fetchdat) { uint64_t dst; - MMX_REG src; + MMX_REG src; MMX_REG *op; MMX_ENTER(); fetch_ea_16(fetchdat); src = MMX_GETREG(cpu_rm); - op = MMX_GETREGP(cpu_reg); + op = MMX_GETREGP(cpu_reg); if (cpu_mod == 3) { op->q = src.q; @@ -206,14 +206,14 @@ static int opMOVQ_q_mm_a32(uint32_t fetchdat) { uint64_t dst; - MMX_REG src; + MMX_REG src; MMX_REG *op; MMX_ENTER(); fetch_ea_32(fetchdat); src = MMX_GETREG(cpu_rm); - op = MMX_GETREGP(cpu_reg); + op = MMX_GETREGP(cpu_reg); if (cpu_mod == 3) { op->q = src.q; @@ -236,7 +236,7 @@ opMOVQ_q_mm_a32(uint32_t fetchdat) static int opMOVQ_mm_q_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -266,7 +266,7 @@ opMOVQ_mm_q_a16(uint32_t fetchdat) static int opMOVQ_mm_q_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); diff --git a/src/cpu/x86_ops_mmx_pack.h b/src/cpu/x86_ops_mmx_pack.h index 90590638b..90ed1b6d2 100644 --- a/src/cpu/x86_ops_mmx_pack.h +++ b/src/cpu/x86_ops_mmx_pack.h @@ -2,7 +2,7 @@ static int opPUNPCKLDQ_a16(uint32_t fetchdat) { uint32_t usrc; - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -32,7 +32,7 @@ static int opPUNPCKLDQ_a32(uint32_t fetchdat) { uint32_t usrc; - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -62,7 +62,7 @@ opPUNPCKLDQ_a32(uint32_t fetchdat) static int opPUNPCKHDQ_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -82,7 +82,7 @@ opPUNPCKHDQ_a16(uint32_t fetchdat) static int opPUNPCKHDQ_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -103,7 +103,7 @@ opPUNPCKHDQ_a32(uint32_t fetchdat) static int opPUNPCKLBW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -129,7 +129,7 @@ opPUNPCKLBW_a16(uint32_t fetchdat) static int opPUNPCKLBW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -156,7 +156,7 @@ opPUNPCKLBW_a32(uint32_t fetchdat) static int opPUNPCKHBW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -182,7 +182,7 @@ opPUNPCKHBW_a16(uint32_t fetchdat) static int opPUNPCKHBW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -209,7 +209,7 @@ opPUNPCKHBW_a32(uint32_t fetchdat) static int opPUNPCKLWD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -231,7 +231,7 @@ opPUNPCKLWD_a16(uint32_t fetchdat) static int opPUNPCKLWD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -254,7 +254,7 @@ opPUNPCKLWD_a32(uint32_t fetchdat) static int opPUNPCKHWD_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -276,7 +276,7 @@ opPUNPCKHWD_a16(uint32_t fetchdat) static int opPUNPCKHWD_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -299,7 +299,7 @@ opPUNPCKHWD_a32(uint32_t fetchdat) static int opPACKSSWB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -325,7 +325,7 @@ opPACKSSWB_a16(uint32_t fetchdat) static int opPACKSSWB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -352,7 +352,7 @@ opPACKSSWB_a32(uint32_t fetchdat) static int opPACKUSWB_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -378,7 +378,7 @@ opPACKUSWB_a16(uint32_t fetchdat) static int opPACKUSWB_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; MMX_ENTER(); @@ -405,14 +405,14 @@ opPACKUSWB_a32(uint32_t fetchdat) static int opPACKSSDW_a16(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; - MMX_REG dst2; + MMX_REG dst2; MMX_ENTER(); fetch_ea_16(fetchdat); - dst = MMX_GETREGP(cpu_reg); + dst = MMX_GETREGP(cpu_reg); dst2 = *dst; MMX_GETSRC(); @@ -429,14 +429,14 @@ opPACKSSDW_a16(uint32_t fetchdat) static int opPACKSSDW_a32(uint32_t fetchdat) { - MMX_REG src; + MMX_REG src; MMX_REG *dst; - MMX_REG dst2; + MMX_REG dst2; MMX_ENTER(); fetch_ea_32(fetchdat); - dst = MMX_GETREGP(cpu_reg); + dst = MMX_GETREGP(cpu_reg); dst2 = *dst; MMX_GETSRC(); diff --git a/src/cpu/x86_ops_mmx_shift.h b/src/cpu/x86_ops_mmx_shift.h index c0c80e87e..04aeb8a9a 100644 --- a/src/cpu/x86_ops_mmx_shift.h +++ b/src/cpu/x86_ops_mmx_shift.h @@ -13,9 +13,9 @@ static int opPSxxW_imm(uint32_t fetchdat) { - int reg = fetchdat & 7; - int op = fetchdat & 0x38; - int shift = (fetchdat >> 8) & 0xff; + int reg = fetchdat & 7; + int op = fetchdat & 0x38; + int shift = (fetchdat >> 8) & 0xff; MMX_REG *dst; cpu_state.pc += 2; @@ -67,7 +67,7 @@ static int opPSLLW_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -94,7 +94,7 @@ static int opPSLLW_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -122,7 +122,7 @@ static int opPSRLW_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -149,7 +149,7 @@ static int opPSRLW_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -177,7 +177,7 @@ static int opPSRAW_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -203,7 +203,7 @@ static int opPSRAW_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -229,9 +229,9 @@ opPSRAW_a32(uint32_t fetchdat) static int opPSxxD_imm(uint32_t fetchdat) { - int reg = fetchdat & 7; - int op = fetchdat & 0x38; - int shift = (fetchdat >> 8) & 0xff; + int reg = fetchdat & 7; + int op = fetchdat & 0x38; + int shift = (fetchdat >> 8) & 0xff; MMX_REG *dst; cpu_state.pc += 2; @@ -278,7 +278,7 @@ static int opPSLLD_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -303,7 +303,7 @@ static int opPSLLD_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -329,7 +329,7 @@ static int opPSRLD_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -354,7 +354,7 @@ static int opPSRLD_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -380,7 +380,7 @@ static int opPSRAD_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -404,7 +404,7 @@ static int opPSRAD_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -428,9 +428,9 @@ opPSRAD_a32(uint32_t fetchdat) static int opPSxxQ_imm(uint32_t fetchdat) { - int reg = fetchdat & 7; - int op = fetchdat & 0x38; - int shift = (fetchdat >> 8) & 0xff; + int reg = fetchdat & 7; + int op = fetchdat & 0x38; + int shift = (fetchdat >> 8) & 0xff; MMX_REG *dst; cpu_state.pc += 2; @@ -474,7 +474,7 @@ static int opPSLLQ_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -497,7 +497,7 @@ static int opPSLLQ_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -521,7 +521,7 @@ static int opPSRLQ_a16(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); @@ -544,7 +544,7 @@ static int opPSRLQ_a32(uint32_t fetchdat) { MMX_REG *dst; - int shift; + int shift; MMX_ENTER(); diff --git a/src/cpu/x86_ops_mov.h b/src/cpu/x86_ops_mov.h index 84239be07..e77876d5c 100644 --- a/src/cpu/x86_ops_mov.h +++ b/src/cpu/x86_ops_mov.h @@ -770,83 +770,83 @@ opMOV_r_l_a32(uint32_t fetchdat) } #ifndef OPS_286_386 -#define opCMOV(condition) \ - static int opCMOV##condition##_w_a16(uint32_t fetchdat) \ - { \ - fetch_ea_16(fetchdat); \ - if (cond_##condition) { \ - if (cpu_mod == 3) \ - cpu_state.regs[cpu_reg].w = cpu_state.regs[cpu_rm].w; \ - else { \ - uint16_t temp; \ - SEG_CHECK_READ(cpu_state.ea_seg); \ - CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ - temp = geteaw(); \ - if (cpu_state.abrt) \ - return 1; \ - cpu_state.regs[cpu_reg].w = temp; \ - } \ - } \ - CLOCK_CYCLES(1); \ - return 0; \ - } \ - static int opCMOV##condition##_w_a32(uint32_t fetchdat) \ - { \ - fetch_ea_32(fetchdat); \ - if (cond_##condition) { \ - if (cpu_mod == 3) \ - cpu_state.regs[cpu_reg].w = cpu_state.regs[cpu_rm].w; \ - else { \ - uint16_t temp; \ - SEG_CHECK_READ(cpu_state.ea_seg); \ - CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ - temp = geteaw(); \ - if (cpu_state.abrt) \ - return 1; \ - cpu_state.regs[cpu_reg].w = temp; \ - } \ - } \ - CLOCK_CYCLES(1); \ - return 0; \ - } \ - static int opCMOV##condition##_l_a16(uint32_t fetchdat) \ - { \ - fetch_ea_16(fetchdat); \ - if (cond_##condition) { \ - if (cpu_mod == 3) \ - cpu_state.regs[cpu_reg].l = cpu_state.regs[cpu_rm].l; \ - else { \ - uint32_t temp; \ - SEG_CHECK_READ(cpu_state.ea_seg); \ - CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ - temp = geteal(); \ - if (cpu_state.abrt) \ - return 1; \ - cpu_state.regs[cpu_reg].l = temp; \ - } \ - } \ - CLOCK_CYCLES(1); \ - return 0; \ - } \ - static int opCMOV##condition##_l_a32(uint32_t fetchdat) \ - { \ - fetch_ea_32(fetchdat); \ - if (cond_##condition) { \ - if (cpu_mod == 3) \ - cpu_state.regs[cpu_reg].l = cpu_state.regs[cpu_rm].l; \ - else { \ - uint32_t temp; \ - CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ - SEG_CHECK_READ(cpu_state.ea_seg); \ - temp = geteal(); \ - if (cpu_state.abrt) \ - return 1; \ - cpu_state.regs[cpu_reg].l = temp; \ - } \ - } \ - CLOCK_CYCLES(1); \ - return 0; \ - } +# define opCMOV(condition) \ + static int opCMOV##condition##_w_a16(uint32_t fetchdat) \ + { \ + fetch_ea_16(fetchdat); \ + if (cond_##condition) { \ + if (cpu_mod == 3) \ + cpu_state.regs[cpu_reg].w = cpu_state.regs[cpu_rm].w; \ + else { \ + uint16_t temp; \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ + temp = geteaw(); \ + if (cpu_state.abrt) \ + return 1; \ + cpu_state.regs[cpu_reg].w = temp; \ + } \ + } \ + CLOCK_CYCLES(1); \ + return 0; \ + } \ + static int opCMOV##condition##_w_a32(uint32_t fetchdat) \ + { \ + fetch_ea_32(fetchdat); \ + if (cond_##condition) { \ + if (cpu_mod == 3) \ + cpu_state.regs[cpu_reg].w = cpu_state.regs[cpu_rm].w; \ + else { \ + uint16_t temp; \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 1); \ + temp = geteaw(); \ + if (cpu_state.abrt) \ + return 1; \ + cpu_state.regs[cpu_reg].w = temp; \ + } \ + } \ + CLOCK_CYCLES(1); \ + return 0; \ + } \ + static int opCMOV##condition##_l_a16(uint32_t fetchdat) \ + { \ + fetch_ea_16(fetchdat); \ + if (cond_##condition) { \ + if (cpu_mod == 3) \ + cpu_state.regs[cpu_reg].l = cpu_state.regs[cpu_rm].l; \ + else { \ + uint32_t temp; \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ + temp = geteal(); \ + if (cpu_state.abrt) \ + return 1; \ + cpu_state.regs[cpu_reg].l = temp; \ + } \ + } \ + CLOCK_CYCLES(1); \ + return 0; \ + } \ + static int opCMOV##condition##_l_a32(uint32_t fetchdat) \ + { \ + fetch_ea_32(fetchdat); \ + if (cond_##condition) { \ + if (cpu_mod == 3) \ + cpu_state.regs[cpu_reg].l = cpu_state.regs[cpu_rm].l; \ + else { \ + uint32_t temp; \ + CHECK_READ(cpu_state.ea_seg, cpu_state.eaaddr, cpu_state.eaaddr + 3); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + temp = geteal(); \ + if (cpu_state.abrt) \ + return 1; \ + cpu_state.regs[cpu_reg].l = temp; \ + } \ + } \ + CLOCK_CYCLES(1); \ + return 0; \ + } // clang-format off opCMOV(O) diff --git a/src/cpu/x86_ops_mov_seg.h b/src/cpu/x86_ops_mov_seg.h index 66e77d585..2498a7d90 100644 --- a/src/cpu/x86_ops_mov_seg.h +++ b/src/cpu/x86_ops_mov_seg.h @@ -536,4 +536,4 @@ opLSS_l_a32(uint32_t fetchdat) opLsel(ES, cpu_state.seg_es) opLsel(FS, cpu_state.seg_fs) opLsel(GS, cpu_state.seg_gs) -// clang-format on + // clang-format on diff --git a/src/cpu/x86_ops_prefix.h b/src/cpu/x86_ops_prefix.h index 1918e8acd..8a7357386 100644 --- a/src/cpu/x86_ops_prefix.h +++ b/src/cpu/x86_ops_prefix.h @@ -88,7 +88,7 @@ op_seg(ES_REPNE, cpu_state.seg_es, x86_opcodes_REPNE, x86_opcodes) op_seg(FS_REPNE, cpu_state.seg_fs, x86_opcodes_REPNE, x86_opcodes) op_seg(GS_REPNE, cpu_state.seg_gs, x86_opcodes_REPNE, x86_opcodes) op_seg(SS_REPNE, cpu_state.seg_ss, x86_opcodes_REPNE, x86_opcodes) -// clang-format on + // clang-format on static int op_66(uint32_t fetchdat) /*Data size select*/ diff --git a/src/cpu/x86_ops_set.h b/src/cpu/x86_ops_set.h index 30c076a6e..75caa4491 100644 --- a/src/cpu/x86_ops_set.h +++ b/src/cpu/x86_ops_set.h @@ -36,4 +36,4 @@ opSET(L) opSET(NL) opSET(LE) opSET(NLE) -// clang-format on + // clang-format on diff --git a/src/cpu/x86_ops_shift.h b/src/cpu/x86_ops_shift.h index 22c9aa8b6..1394cf420 100644 --- a/src/cpu/x86_ops_shift.h +++ b/src/cpu/x86_ops_shift.h @@ -1071,4 +1071,4 @@ opSHxD(SHLD_w) opSHxD(SHLD_l) opSHxD(SHRD_w) opSHxD(SHRD_l) -// clang-format on + // clang-format on diff --git a/src/cpu/x86_ops_stack.h b/src/cpu/x86_ops_stack.h index aa6d4f31f..8fa66e082 100644 --- a/src/cpu/x86_ops_stack.h +++ b/src/cpu/x86_ops_stack.h @@ -189,22 +189,50 @@ static int opPOPA_l(uint32_t fetchdat) { if (stack32) { - EDI = readmeml(ss, ESP); if (cpu_state.abrt) return 1; - ESI = readmeml(ss, ESP + 4); if (cpu_state.abrt) return 1; - EBP = readmeml(ss, ESP + 8); if (cpu_state.abrt) return 1; - EBX = readmeml(ss, ESP + 16); if (cpu_state.abrt) return 1; - EDX = readmeml(ss, ESP + 20); if (cpu_state.abrt) return 1; - ECX = readmeml(ss, ESP + 24); if (cpu_state.abrt) return 1; - EAX = readmeml(ss, ESP + 28); if (cpu_state.abrt) return 1; + EDI = readmeml(ss, ESP); + if (cpu_state.abrt) + return 1; + ESI = readmeml(ss, ESP + 4); + if (cpu_state.abrt) + return 1; + EBP = readmeml(ss, ESP + 8); + if (cpu_state.abrt) + return 1; + EBX = readmeml(ss, ESP + 16); + if (cpu_state.abrt) + return 1; + EDX = readmeml(ss, ESP + 20); + if (cpu_state.abrt) + return 1; + ECX = readmeml(ss, ESP + 24); + if (cpu_state.abrt) + return 1; + EAX = readmeml(ss, ESP + 28); + if (cpu_state.abrt) + return 1; ESP += 32; } else { - EDI = readmeml(ss, ((SP) & 0xFFFF)); if (cpu_state.abrt) return 1; - ESI = readmeml(ss, ((SP + 4) & 0xFFFF)); if (cpu_state.abrt) return 1; - EBP = readmeml(ss, ((SP + 8) & 0xFFFF)); if (cpu_state.abrt) return 1; - EBX = readmeml(ss, ((SP + 16) & 0xFFFF)); if (cpu_state.abrt) return 1; - EDX = readmeml(ss, ((SP + 20) & 0xFFFF)); if (cpu_state.abrt) return 1; - ECX = readmeml(ss, ((SP + 24) & 0xFFFF)); if (cpu_state.abrt) return 1; - EAX = readmeml(ss, ((SP + 28) & 0xFFFF)); if (cpu_state.abrt) return 1; + EDI = readmeml(ss, ((SP) &0xFFFF)); + if (cpu_state.abrt) + return 1; + ESI = readmeml(ss, ((SP + 4) & 0xFFFF)); + if (cpu_state.abrt) + return 1; + EBP = readmeml(ss, ((SP + 8) & 0xFFFF)); + if (cpu_state.abrt) + return 1; + EBX = readmeml(ss, ((SP + 16) & 0xFFFF)); + if (cpu_state.abrt) + return 1; + EDX = readmeml(ss, ((SP + 20) & 0xFFFF)); + if (cpu_state.abrt) + return 1; + ECX = readmeml(ss, ((SP + 24) & 0xFFFF)); + if (cpu_state.abrt) + return 1; + EAX = readmeml(ss, ((SP + 28) & 0xFFFF)); + if (cpu_state.abrt) + return 1; SP += 32; } CLOCK_CYCLES((is486) ? 9 : 24); diff --git a/src/cpu/x86_ops_xchg.h b/src/cpu/x86_ops_xchg.h index c5ce08999..70e7be58c 100644 --- a/src/cpu/x86_ops_xchg.h +++ b/src/cpu/x86_ops_xchg.h @@ -275,4 +275,4 @@ opBSWAP(ESI) opBSWAP(EDI) opBSWAP(EBP) opBSWAP(ESP) -// clang-format on + // clang-format on diff --git a/src/cpu/x87.c b/src/cpu/x87.c index 3918800aa..c75dac569 100644 --- a/src/cpu/x87.c +++ b/src/cpu/x87.c @@ -105,7 +105,6 @@ x87_settag(uint16_t new_tag) } #endif - static floatx80 FPU_handle_NaN32_Func(floatx80 a, int aIsNaN, float32 b32, int bIsNaN, struct float_status_t *status) { @@ -118,24 +117,27 @@ FPU_handle_NaN32_Func(floatx80 a, int aIsNaN, float32 b32, int bIsNaN, struct fl // propagate QNaN to SNaN a = propagateFloatx80NaNOne(a, status); - if (aIsNaN & !bIsNaN) return a; + if (aIsNaN & !bIsNaN) + return a; // float32 is NaN so conversion will propagate SNaN to QNaN and raise // appropriate exception flags floatx80 b = float32_to_floatx80(b32, status); if (aIsSignalingNaN) { - if (bIsSignalingNaN) goto returnLargerSignificand; + if (bIsSignalingNaN) + goto returnLargerSignificand; return bIsNaN ? b : a; - } - else if (aIsNaN) { - if (bIsSignalingNaN) return a; - returnLargerSignificand: - if (a.fraction < b.fraction) return b; - if (b.fraction < a.fraction) return a; + } else if (aIsNaN) { + if (bIsSignalingNaN) + return a; +returnLargerSignificand: + if (a.fraction < b.fraction) + return b; + if (b.fraction < a.fraction) + return a; return (a.exp < b.exp) ? a : b; - } - else { + } else { return b; } } @@ -172,24 +174,27 @@ FPU_handle_NaN64_Func(floatx80 a, int aIsNaN, float64 b64, int bIsNaN, struct fl // propagate QNaN to SNaN a = propagateFloatx80NaNOne(a, status); - if (aIsNaN & !bIsNaN) return a; + if (aIsNaN & !bIsNaN) + return a; // float64 is NaN so conversion will propagate SNaN to QNaN and raise // appropriate exception flags floatx80 b = float64_to_floatx80(b64, status); if (aIsSignalingNaN) { - if (bIsSignalingNaN) goto returnLargerSignificand; + if (bIsSignalingNaN) + goto returnLargerSignificand; return bIsNaN ? b : a; - } - else if (aIsNaN) { - if (bIsSignalingNaN) return a; - returnLargerSignificand: - if (a.fraction < b.fraction) return b; - if (b.fraction < a.fraction) return a; + } else if (aIsNaN) { + if (bIsSignalingNaN) + return a; +returnLargerSignificand: + if (a.fraction < b.fraction) + return b; + if (b.fraction < a.fraction) + return a; return (a.exp < b.exp) ? a : b; - } - else { + } else { return b; } } @@ -218,7 +223,7 @@ struct float_status_t i387cw_to_softfloat_status_word(uint16_t control_word) { struct float_status_t status; - int precision = control_word & FPU_CW_PC; + int precision = control_word & FPU_CW_PC; switch (precision) { case FPU_PR_32_BITS: @@ -231,24 +236,23 @@ i387cw_to_softfloat_status_word(uint16_t control_word) status.float_rounding_precision = 80; break; default: - /* With the precision control bits set to 01 "(reserved)", a - real CPU behaves as if the precision control bits were - set to 11 "80 bits" */ + /* With the precision control bits set to 01 "(reserved)", a + real CPU behaves as if the precision control bits were + set to 11 "80 bits" */ status.float_rounding_precision = 80; break; } - status.float_exception_flags = 0; // clear exceptions before execution - status.float_nan_handling_mode = float_first_operand_nan; - status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10; - status.flush_underflow_to_zero = 0; + status.float_exception_flags = 0; // clear exceptions before execution + status.float_nan_handling_mode = float_first_operand_nan; + status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10; + status.flush_underflow_to_zero = 0; status.float_suppress_exception = 0; - status.float_exception_masks = control_word & FPU_CW_Exceptions_Mask; - status.denormals_are_zeros = 0; + status.float_exception_masks = control_word & FPU_CW_Exceptions_Mask; + status.denormals_are_zeros = 0; return status; } - int FPU_status_word_flags_fpu_compare(int float_relation) { @@ -266,7 +270,7 @@ FPU_status_word_flags_fpu_compare(int float_relation) return C3; } - return (-1); // should never get here + return (-1); // should never get here } void @@ -320,9 +324,9 @@ FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store) fpu_state.swd |= exceptions; if (exceptions & FPU_SW_Stack_Fault) { if (!(exceptions & C1)) { - /* This bit distinguishes over- from underflow for a stack fault, - and roundup from round-down for precision loss. */ - fpu_state.swd &= ~C1; + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + fpu_state.swd &= ~C1; } } return unmasked; @@ -355,8 +359,8 @@ FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store) if (exceptions & FPU_EX_Precision) { if (!(exceptions & C1)) { - /* This bit distinguishes over- from underflow for a stack fault, - and roundup from round-down for precision loss. */ + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ fpu_state.swd &= ~C1; } } @@ -445,14 +449,22 @@ pack_FPU_TW(uint16_t twd) { uint8_t tag_byte = 0; - if ((twd & 0x0003) != 0x0003) tag_byte |= 0x01; - if ((twd & 0x000c) != 0x000c) tag_byte |= 0x02; - if ((twd & 0x0030) != 0x0030) tag_byte |= 0x04; - if ((twd & 0x00c0) != 0x00c0) tag_byte |= 0x08; - if ((twd & 0x0300) != 0x0300) tag_byte |= 0x10; - if ((twd & 0x0c00) != 0x0c00) tag_byte |= 0x20; - if ((twd & 0x3000) != 0x3000) tag_byte |= 0x40; - if ((twd & 0xc000) != 0xc000) tag_byte |= 0x80; + if ((twd & 0x0003) != 0x0003) + tag_byte |= 0x01; + if ((twd & 0x000c) != 0x000c) + tag_byte |= 0x02; + if ((twd & 0x0030) != 0x0030) + tag_byte |= 0x04; + if ((twd & 0x00c0) != 0x00c0) + tag_byte |= 0x08; + if ((twd & 0x0300) != 0x0300) + tag_byte |= 0x10; + if ((twd & 0x0c00) != 0x0c00) + tag_byte |= 0x20; + if ((twd & 0x3000) != 0x3000) + tag_byte |= 0x40; + if ((twd & 0xc000) != 0xc000) + tag_byte |= 0x80; return tag_byte; } @@ -462,45 +474,45 @@ unpack_FPU_TW(uint16_t tag_byte) { uint32_t twd = 0; - /* FTW - * - * Note that the original format for FTW can be recreated from the stored - * FTW valid bits and the stored 80-bit FP data (assuming the stored data - * was not the contents of MMX registers) using the following table: + /* FTW + * + * Note that the original format for FTW can be recreated from the stored + * FTW valid bits and the stored 80-bit FP data (assuming the stored data + * was not the contents of MMX registers) using the following table: - | Exponent | Exponent | Fraction | J,M bits | FTW valid | x87 FTW | - | all 1s | all 0s | all 0s | | | | - ------------------------------------------------------------------- - | 0 | 0 | 0 | 0x | 1 | S 10 | - | 0 | 0 | 0 | 1x | 1 | V 00 | - ------------------------------------------------------------------- - | 0 | 0 | 1 | 00 | 1 | S 10 | - | 0 | 0 | 1 | 10 | 1 | V 00 | - ------------------------------------------------------------------- - | 0 | 1 | 0 | 0x | 1 | S 10 | - | 0 | 1 | 0 | 1x | 1 | S 10 | - ------------------------------------------------------------------- - | 0 | 1 | 1 | 00 | 1 | Z 01 | - | 0 | 1 | 1 | 10 | 1 | S 10 | - ------------------------------------------------------------------- - | 1 | 0 | 0 | 1x | 1 | S 10 | - | 1 | 0 | 0 | 1x | 1 | S 10 | - ------------------------------------------------------------------- - | 1 | 0 | 1 | 00 | 1 | S 10 | - | 1 | 0 | 1 | 10 | 1 | S 10 | - ------------------------------------------------------------------- - | all combinations above | 0 | E 11 | + | Exponent | Exponent | Fraction | J,M bits | FTW valid | x87 FTW | + | all 1s | all 0s | all 0s | | | | + ------------------------------------------------------------------- + | 0 | 0 | 0 | 0x | 1 | S 10 | + | 0 | 0 | 0 | 1x | 1 | V 00 | + ------------------------------------------------------------------- + | 0 | 0 | 1 | 00 | 1 | S 10 | + | 0 | 0 | 1 | 10 | 1 | V 00 | + ------------------------------------------------------------------- + | 0 | 1 | 0 | 0x | 1 | S 10 | + | 0 | 1 | 0 | 1x | 1 | S 10 | + ------------------------------------------------------------------- + | 0 | 1 | 1 | 00 | 1 | Z 01 | + | 0 | 1 | 1 | 10 | 1 | S 10 | + ------------------------------------------------------------------- + | 1 | 0 | 0 | 1x | 1 | S 10 | + | 1 | 0 | 0 | 1x | 1 | S 10 | + ------------------------------------------------------------------- + | 1 | 0 | 1 | 00 | 1 | S 10 | + | 1 | 0 | 1 | 10 | 1 | S 10 | + ------------------------------------------------------------------- + | all combinations above | 0 | E 11 | - * - * The J-bit is defined to be the 1-bit binary integer to the left of - * the decimal place in the significand. - * - * The M-bit is defined to be the most significant bit of the fractional - * portion of the significand (i.e., the bit immediately to the right of - * the decimal place). When the M-bit is the most significant bit of the - * fractional portion of the significand, it must be 0 if the fraction - * is all 0's. - */ + * + * The J-bit is defined to be the 1-bit binary integer to the left of + * the decimal place in the significand. + * + * The M-bit is defined to be the most significant bit of the fractional + * portion of the significand (i.e., the bit immediately to the right of + * the decimal place). When the M-bit is the most significant bit of the + * fractional portion of the significand, it must be 0 if the fraction + * is all 0's. + */ for (int index = 7; index >= 0; index--, twd <<= 2, tag_byte <<= 1) { if (tag_byte & 0x80) { diff --git a/src/cpu/x87.h b/src/cpu/x87.h index 2d8708da4..1d889a544 100644 --- a/src/cpu/x87.h +++ b/src/cpu/x87.h @@ -16,9 +16,9 @@ x87_set_mmx(void) fpu_state.tag = 0; fpu_state.tos = 0; /* reset FPU Top-Of-Stack */ } else { - cpu_state.TOP = 0; - p = (uint64_t *) cpu_state.tag; - *p = 0x0101010101010101ULL; + cpu_state.TOP = 0; + p = (uint64_t *) cpu_state.tag; + *p = 0x0101010101010101ULL; } cpu_state.ismmx = 1; } @@ -31,8 +31,8 @@ x87_emms(void) fpu_state.tag = 0xffff; fpu_state.tos = 0; /* reset FPU Top-Of-Stack */ } else { - p = (uint64_t *) cpu_state.tag; - *p = 0; + p = (uint64_t *) cpu_state.tag; + *p = 0; } cpu_state.ismmx = 0; } @@ -60,81 +60,87 @@ void x87_settag(uint16_t new_tag); void codegen_set_rounding_mode(int mode); /* Status Word */ -#define FPU_SW_Backward (0x8000) /* backward compatibility */ -#define FPU_SW_C3 (0x4000) /* condition bit 3 */ -#define FPU_SW_Top (0x3800) /* top of stack */ -#define FPU_SW_C2 (0x0400) /* condition bit 2 */ -#define FPU_SW_C1 (0x0200) /* condition bit 1 */ -#define FPU_SW_C0 (0x0100) /* condition bit 0 */ -#define FPU_SW_Summary (0x0080) /* exception summary */ -#define FPU_SW_Stack_Fault (0x0040) /* stack fault */ -#define FPU_SW_Precision (0x0020) /* loss of precision */ -#define FPU_SW_Underflow (0x0010) /* underflow */ -#define FPU_SW_Overflow (0x0008) /* overflow */ -#define FPU_SW_Zero_Div (0x0004) /* divide by zero */ -#define FPU_SW_Denormal_Op (0x0002) /* denormalized operand */ -#define FPU_SW_Invalid (0x0001) /* invalid operation */ +#define FPU_SW_Backward (0x8000) /* backward compatibility */ +#define FPU_SW_C3 (0x4000) /* condition bit 3 */ +#define FPU_SW_Top (0x3800) /* top of stack */ +#define FPU_SW_C2 (0x0400) /* condition bit 2 */ +#define FPU_SW_C1 (0x0200) /* condition bit 1 */ +#define FPU_SW_C0 (0x0100) /* condition bit 0 */ +#define FPU_SW_Summary (0x0080) /* exception summary */ +#define FPU_SW_Stack_Fault (0x0040) /* stack fault */ +#define FPU_SW_Precision (0x0020) /* loss of precision */ +#define FPU_SW_Underflow (0x0010) /* underflow */ +#define FPU_SW_Overflow (0x0008) /* overflow */ +#define FPU_SW_Zero_Div (0x0004) /* divide by zero */ +#define FPU_SW_Denormal_Op (0x0002) /* denormalized operand */ +#define FPU_SW_Invalid (0x0001) /* invalid operation */ -#define C0 (1 << 8) -#define C1 (1 << 9) -#define C2 (1 << 10) -#define C3 (1 << 14) +#define C0 (1 << 8) +#define C1 (1 << 9) +#define C2 (1 << 10) +#define C3 (1 << 14) -#define FPU_SW_CC (C0 | C1 | C2 | C3) +#define FPU_SW_CC (C0 | C1 | C2 | C3) -#define FPU_SW_Exceptions_Mask (0x027f) /* status word exceptions bit mask */ +#define FPU_SW_Exceptions_Mask (0x027f) /* status word exceptions bit mask */ /* Exception flags: */ -#define FPU_EX_Precision (0x0020) /* loss of precision */ -#define FPU_EX_Underflow (0x0010) /* underflow */ -#define FPU_EX_Overflow (0x0008) /* overflow */ -#define FPU_EX_Zero_Div (0x0004) /* divide by zero */ -#define FPU_EX_Denormal (0x0002) /* denormalized operand */ -#define FPU_EX_Invalid (0x0001) /* invalid operation */ +#define FPU_EX_Precision (0x0020) /* loss of precision */ +#define FPU_EX_Underflow (0x0010) /* underflow */ +#define FPU_EX_Overflow (0x0008) /* overflow */ +#define FPU_EX_Zero_Div (0x0004) /* divide by zero */ +#define FPU_EX_Denormal (0x0002) /* denormalized operand */ +#define FPU_EX_Invalid (0x0001) /* invalid operation */ /* Special exceptions: */ -#define FPU_EX_Stack_Overflow (0x0041| C1) /* stack overflow */ -#define FPU_EX_Stack_Underflow (0x0041) /* stack underflow */ +#define FPU_EX_Stack_Overflow (0x0041 | C1) /* stack overflow */ +#define FPU_EX_Stack_Underflow (0x0041) /* stack underflow */ /* precision control */ -#define FPU_EX_Precision_Lost_Up (EX_Precision | C1) -#define FPU_EX_Precision_Lost_Dn (EX_Precision) +#define FPU_EX_Precision_Lost_Up (EX_Precision | C1) +#define FPU_EX_Precision_Lost_Dn (EX_Precision) -#define setcc(cc) \ - fpu_state.swd = (fpu_state.swd & ~(FPU_SW_CC)) | ((cc) & FPU_SW_CC) +#define setcc(cc) \ + fpu_state.swd = (fpu_state.swd & ~(FPU_SW_CC)) | ((cc) &FPU_SW_CC) -#define clear_C1() { fpu_state.swd &= ~C1; } -#define clear_C2() { fpu_state.swd &= ~C2; } +#define clear_C1() \ + { \ + fpu_state.swd &= ~C1; \ + } +#define clear_C2() \ + { \ + fpu_state.swd &= ~C2; \ + } /* ************ */ /* Control Word */ /* ************ */ -#define FPU_CW_Inf (0x1000) /* infinity control, legacy */ +#define FPU_CW_Inf (0x1000) /* infinity control, legacy */ -#define FPU_CW_RC (0x0C00) /* rounding control */ -#define FPU_CW_PC (0x0300) /* precision control */ +#define FPU_CW_RC (0x0C00) /* rounding control */ +#define FPU_CW_PC (0x0300) /* precision control */ -#define FPU_RC_RND (0x0000) /* rounding control */ -#define FPU_RC_DOWN (0x0400) -#define FPU_RC_UP (0x0800) -#define FPU_RC_CHOP (0x0C00) +#define FPU_RC_RND (0x0000) /* rounding control */ +#define FPU_RC_DOWN (0x0400) +#define FPU_RC_UP (0x0800) +#define FPU_RC_CHOP (0x0C00) -#define FPU_CW_Precision (0x0020) /* loss of precision mask */ -#define FPU_CW_Underflow (0x0010) /* underflow mask */ -#define FPU_CW_Overflow (0x0008) /* overflow mask */ -#define FPU_CW_Zero_Div (0x0004) /* divide by zero mask */ -#define FPU_CW_Denormal (0x0002) /* denormalized operand mask */ -#define FPU_CW_Invalid (0x0001) /* invalid operation mask */ +#define FPU_CW_Precision (0x0020) /* loss of precision mask */ +#define FPU_CW_Underflow (0x0010) /* underflow mask */ +#define FPU_CW_Overflow (0x0008) /* overflow mask */ +#define FPU_CW_Zero_Div (0x0004) /* divide by zero mask */ +#define FPU_CW_Denormal (0x0002) /* denormalized operand mask */ +#define FPU_CW_Invalid (0x0001) /* invalid operation mask */ -#define FPU_CW_Exceptions_Mask (0x003f) /* all masks */ +#define FPU_CW_Exceptions_Mask (0x003f) /* all masks */ /* Precision control bits affect only the following: ADD, SUB(R), MUL, DIV(R), and SQRT */ -#define FPU_PR_32_BITS (0x000) -#define FPU_PR_RESERVED_BITS (0x100) -#define FPU_PR_64_BITS (0x200) -#define FPU_PR_80_BITS (0x300) +#define FPU_PR_32_BITS (0x000) +#define FPU_PR_RESERVED_BITS (0x100) +#define FPU_PR_64_BITS (0x200) +#define FPU_PR_80_BITS (0x300) #include "softfloat/softfloatx80.h" @@ -145,16 +151,16 @@ is_IA_masked(void) } struct float_status_t i387cw_to_softfloat_status_word(uint16_t control_word); -uint16_t FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store); -int FPU_status_word_flags_fpu_compare(int float_relation); -void FPU_write_eflags_fpu_compare(int float_relation); -void FPU_stack_overflow(uint32_t fetchdat); -void FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack); -int FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status); -int FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status); -int FPU_tagof(const floatx80 reg); -uint8_t pack_FPU_TW(uint16_t twd); -uint16_t unpack_FPU_TW(uint16_t tag_byte); +uint16_t FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store); +int FPU_status_word_flags_fpu_compare(int float_relation); +void FPU_write_eflags_fpu_compare(int float_relation); +void FPU_stack_overflow(uint32_t fetchdat); +void FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack); +int FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status); +int FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status); +int FPU_tagof(const floatx80 reg); +uint8_t pack_FPU_TW(uint16_t twd); +uint16_t unpack_FPU_TW(uint16_t tag_byte); static __inline uint16_t i387_get_control_word(void) @@ -181,7 +187,7 @@ static __inline void FPU_settagi_valid(int stnr) { int regnr = (stnr + fpu_state.tos) & 7; - fpu_state.tag &= ~(3 << (regnr * 2)); // FPU_Tag_Valid == '00 + fpu_state.tag &= ~(3 << (regnr * 2)); // FPU_Tag_Valid == '00 } static __inline void @@ -228,16 +234,15 @@ FPU_save_regi_tag(floatx80 reg, int tag, int stnr) FPU_settagi(tag, stnr); } - -#define FPU_check_pending_exceptions() \ -do { \ - if (fpu_state.swd & FPU_SW_Summary) { \ - if (cr0 & 0x20) { \ - x86_int(16); \ - return 1; \ - } else { \ - picint(1 << 13); \ - return 1; \ - } \ - } \ -} while (0) +#define FPU_check_pending_exceptions() \ + do { \ + if (fpu_state.swd & FPU_SW_Summary) { \ + if (cr0 & 0x20) { \ + x86_int(16); \ + return 1; \ + } else { \ + picint(1 << 13); \ + return 1; \ + } \ + } \ + } while (0) diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index cde0128dc..d1c55f2e9 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -45,21 +45,20 @@ static int rounding_modes[4] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARD #define C2 (1 << 10) #define C3 (1 << 14) -#define X87_TAG_VALID 0 -#define X87_TAG_ZERO 1 -#define X87_TAG_INVALID 2 -#define X87_TAG_EMPTY 3 +#define X87_TAG_VALID 0 +#define X87_TAG_ZERO 1 +#define X87_TAG_INVALID 2 +#define X87_TAG_EMPTY 3 #define STATUS_ZERODIVIDE 4 -typedef union -{ +typedef union { double d; struct { - uint64_t mantissa:52; - uint64_t exponent:11; - uint64_t negative:1; + uint64_t mantissa : 52; + uint64_t exponent : 11; + uint64_t negative : 1; }; } double_decompose_t; @@ -1074,7 +1073,6 @@ const OpFn OP_TABLE(fpu_8087_df)[256] = { #else # define ILLEGAL_a32 FPU_ILLEGAL_a32 - const OpFn OP_TABLE(sf_fpu_d8_a16)[32] = { // clang-format off sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, @@ -1413,7 +1411,7 @@ const OpFn OP_TABLE(sf_fpu_da_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(sf_fpu_686_da_a16)[256] = { // clang-format off sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, @@ -1493,7 +1491,7 @@ const OpFn OP_TABLE(sf_fpu_686_da_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(sf_fpu_287_db_a16)[256] = { // clang-format off @@ -1655,7 +1653,7 @@ const OpFn OP_TABLE(sf_fpu_db_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(sf_fpu_686_db_a16)[256] = { // clang-format off sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, @@ -1734,7 +1732,7 @@ const OpFn OP_TABLE(sf_fpu_686_db_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(sf_fpu_287_dc_a16)[32] = { // clang-format off @@ -2252,7 +2250,7 @@ const OpFn OP_TABLE(sf_fpu_df_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(sf_fpu_686_df_a16)[256] = { // clang-format off sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, @@ -2332,7 +2330,7 @@ const OpFn OP_TABLE(sf_fpu_686_df_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(fpu_d8_a16)[32] = { // clang-format off @@ -2672,7 +2670,7 @@ const OpFn OP_TABLE(fpu_da_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(fpu_686_da_a16)[256] = { // clang-format off opFADDil_a16, opFADDil_a16, opFADDil_a16, opFADDil_a16, opFADDil_a16, opFADDil_a16, opFADDil_a16, opFADDil_a16, @@ -2752,7 +2750,7 @@ const OpFn OP_TABLE(fpu_686_da_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(fpu_287_db_a16)[256] = { // clang-format off @@ -2914,7 +2912,7 @@ const OpFn OP_TABLE(fpu_db_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(fpu_686_db_a16)[256] = { // clang-format off opFILDil_a16, opFILDil_a16, opFILDil_a16, opFILDil_a16, opFILDil_a16, opFILDil_a16, opFILDil_a16, opFILDil_a16, @@ -2993,7 +2991,7 @@ const OpFn OP_TABLE(fpu_686_db_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(fpu_287_dc_a16)[32] = { // clang-format off @@ -3511,7 +3509,7 @@ const OpFn OP_TABLE(fpu_df_a32)[256] = { // clang-format on }; -#ifndef OPS_286_386 +# ifndef OPS_286_386 const OpFn OP_TABLE(fpu_686_df_a16)[256] = { // clang-format off opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, opFILDiw_a16, @@ -3591,7 +3589,7 @@ const OpFn OP_TABLE(fpu_686_df_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, // clang-format on }; -#endif +# endif const OpFn OP_TABLE(nofpu_a16)[256] = { // clang-format off diff --git a/src/cpu/x87_ops_arith.h b/src/cpu/x87_ops_arith.h index b5cdf5117..808a15051 100644 --- a/src/cpu/x87_ops_arith.h +++ b/src/cpu/x87_ops_arith.h @@ -143,9 +143,9 @@ opFPU(il, uint32_t, 16, t, geteal, (double) (int32_t) t, _i32) #ifndef FPU_8087 opFPU(il, uint32_t, 32, t, geteal, (double) (int32_t) t, _i32) #endif -// clang-format on + // clang-format on -static int opFADD(uint32_t fetchdat) + static int opFADD(uint32_t fetchdat) { FP_ENTER(); cpu_state.pc++; @@ -242,7 +242,7 @@ opFUCOMPP(uint32_t fetchdat) return 0; } -#ifndef OPS_286_386 +# ifndef OPS_286_386 static int opFCOMI(uint32_t fetchdat) { @@ -274,7 +274,7 @@ opFCOMIP(uint32_t fetchdat) CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); return 0; } -#endif +# endif #endif static int @@ -478,7 +478,7 @@ opFUCOMP(uint32_t fetchdat) return 0; } -#ifndef OPS_286_386 +# ifndef OPS_286_386 static int opFUCOMI(uint32_t fetchdat) { @@ -510,5 +510,5 @@ opFUCOMIP(uint32_t fetchdat) CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); return 0; } -#endif +# endif #endif diff --git a/src/cpu/x87_ops_misc.h b/src/cpu/x87_ops_misc.h index f4d1dd2ea..86fc0307e 100644 --- a/src/cpu/x87_ops_misc.h +++ b/src/cpu/x87_ops_misc.h @@ -1046,23 +1046,23 @@ opFSTCW_a32(uint32_t fetchdat) #endif #ifndef FPU_8087 -#ifndef OPS_286_386 -# define opFCMOV(condition) \ - static int opFCMOV##condition(uint32_t fetchdat) \ - { \ - FP_ENTER(); \ - cpu_state.pc++; \ - if (cond_##condition) { \ - cpu_state.tag[cpu_state.TOP & 7] = cpu_state.tag[(cpu_state.TOP + fetchdat) & 7]; \ - cpu_state.MM[cpu_state.TOP & 7].q = cpu_state.MM[(cpu_state.TOP + fetchdat) & 7].q; \ - ST(0) = ST(fetchdat & 7); \ - } \ - CLOCK_CYCLES_FPU(4); \ - return 0; \ - } +# ifndef OPS_286_386 +# define opFCMOV(condition) \ + static int opFCMOV##condition(uint32_t fetchdat) \ + { \ + FP_ENTER(); \ + cpu_state.pc++; \ + if (cond_##condition) { \ + cpu_state.tag[cpu_state.TOP & 7] = cpu_state.tag[(cpu_state.TOP + fetchdat) & 7]; \ + cpu_state.MM[cpu_state.TOP & 7].q = cpu_state.MM[(cpu_state.TOP + fetchdat) & 7].q; \ + ST(0) = ST(fetchdat & 7); \ + } \ + CLOCK_CYCLES_FPU(4); \ + return 0; \ + } -# define cond_U (PF_SET()) -# define cond_NU (!PF_SET()) +# define cond_U (PF_SET()) +# define cond_NU (!PF_SET()) // clang-format off opFCMOV(B) @@ -1074,5 +1074,5 @@ opFCMOV(NE) opFCMOV(NBE) opFCMOV(NU) // clang-format on -#endif +# endif #endif diff --git a/src/cpu/x87_ops_sf.h b/src/cpu/x87_ops_sf.h index fb2f790a4..137919fa9 100644 --- a/src/cpu/x87_ops_sf.h +++ b/src/cpu/x87_ops_sf.h @@ -1,7 +1,7 @@ static uint32_t fpu_save_environment(void) { - int tag; + int tag; unsigned offset = 0; /* read all registers in stack order and update x87 tag word */ @@ -16,94 +16,98 @@ fpu_save_environment(void) fpu_state.swd = (fpu_state.swd & ~(7 << 11)) | ((fpu_state.tos & 7) << 11); switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { - case 0x000: { /*16-bit real mode*/ - uint16_t tmp; - uint32_t fp_ip; - uint32_t fp_dp; + case 0x000: + { /*16-bit real mode*/ + uint16_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; - fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; - fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; + fp_ip = ((uint32_t) (fpu_state.fcs << 4)) | fpu_state.fip; + fp_dp = ((uint32_t) (fpu_state.fds << 4)) | fpu_state.fdp; - tmp = i387_get_control_word(); - writememw(easeg, cpu_state.eaaddr + 0x00, tmp); - tmp = i387_get_status_word(); - writememw(easeg, cpu_state.eaaddr + 0x02, tmp); - tmp = fpu_state.tag; - writememw(easeg, cpu_state.eaaddr + 0x04, tmp); - tmp = fp_ip & 0xffff; - writememw(easeg, cpu_state.eaaddr + 0x06, tmp); - tmp = (uint16_t)((fp_ip & 0xf0000) >> 4) | fpu_state.foo; - writememw(easeg, cpu_state.eaaddr + 0x08, tmp); - tmp = fp_dp & 0xffff; - writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); - tmp = (uint16_t)((fp_dp & 0xf0000) >> 4); - writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); - offset = 0x0e; - } - break; - case 0x001: {/*16-bit protected mode*/ - uint16_t tmp; - tmp = i387_get_control_word(); - writememw(easeg, cpu_state.eaaddr + 0x00, tmp); - tmp = i387_get_status_word(); - writememw(easeg, cpu_state.eaaddr + 0x02, tmp); - tmp = fpu_state.tag; - writememw(easeg, cpu_state.eaaddr + 0x04, tmp); - tmp = (uint16_t)(fpu_state.fip) & 0xffff; - writememw(easeg, cpu_state.eaaddr + 0x06, tmp); - tmp = fpu_state.fcs; - writememw(easeg, cpu_state.eaaddr + 0x08, tmp); - tmp = (uint16_t)(fpu_state.fdp) & 0xffff; - writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); - tmp = fpu_state.fds; - writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); - offset = 0x0e; - } - break; - case 0x100: { /*32-bit real mode*/ - uint32_t tmp; - uint32_t fp_ip; - uint32_t fp_dp; + tmp = i387_get_control_word(); + writememw(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = i387_get_status_word(); + writememw(easeg, cpu_state.eaaddr + 0x02, tmp); + tmp = fpu_state.tag; + writememw(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = fp_ip & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x06, tmp); + tmp = (uint16_t) ((fp_ip & 0xf0000) >> 4) | fpu_state.foo; + writememw(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = fp_dp & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); + tmp = (uint16_t) ((fp_dp & 0xf0000) >> 4); + writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); + offset = 0x0e; + } + break; + case 0x001: + { /*16-bit protected mode*/ + uint16_t tmp; + tmp = i387_get_control_word(); + writememw(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = i387_get_status_word(); + writememw(easeg, cpu_state.eaaddr + 0x02, tmp); + tmp = fpu_state.tag; + writememw(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = (uint16_t) (fpu_state.fip) & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x06, tmp); + tmp = fpu_state.fcs; + writememw(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = (uint16_t) (fpu_state.fdp) & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); + tmp = fpu_state.fds; + writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); + offset = 0x0e; + } + break; + case 0x100: + { /*32-bit real mode*/ + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; - fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; - fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; + fp_ip = ((uint32_t) (fpu_state.fcs << 4)) | fpu_state.fip; + fp_dp = ((uint32_t) (fpu_state.fds << 4)) | fpu_state.fdp; - tmp = 0xffff0000 | i387_get_control_word(); - writememl(easeg, cpu_state.eaaddr + 0x00, tmp); - tmp = 0xffff0000 | i387_get_status_word(); - writememl(easeg, cpu_state.eaaddr + 0x04, tmp); - tmp = 0xffff0000 | fpu_state.tag; - writememl(easeg, cpu_state.eaaddr + 0x08, tmp); - tmp = 0xffff0000 | (fp_ip & 0xffff); - writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); - tmp = ((fp_ip & 0xffff0000) >> 4) | fpu_state.foo; - writememl(easeg, cpu_state.eaaddr + 0x10, tmp); - tmp = 0xffff0000 | (fp_dp & 0xffff); - writememl(easeg, cpu_state.eaaddr + 0x14, tmp); - tmp = (fp_dp & 0xffff0000) >> 4; - writememl(easeg, cpu_state.eaaddr + 0x18, tmp); - offset = 0x1c; - } - break; - case 0x101: { /*32-bit protected mode*/ - uint32_t tmp; - tmp = 0xffff0000 | i387_get_control_word(); - writememl(easeg, cpu_state.eaaddr + 0x00, tmp); - tmp = 0xffff0000 | i387_get_status_word(); - writememl(easeg, cpu_state.eaaddr + 0x04, tmp); - tmp = 0xffff0000 | fpu_state.tag; - writememl(easeg, cpu_state.eaaddr + 0x08, tmp); - tmp = (uint32_t)(fpu_state.fip); - writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); - tmp = fpu_state.fcs | (((uint32_t)(fpu_state.foo)) << 16); - writememl(easeg, cpu_state.eaaddr + 0x10, tmp); - tmp = (uint32_t)(fpu_state.fdp); - writememl(easeg, cpu_state.eaaddr + 0x14, tmp); - tmp = 0xffff0000 | fpu_state.fds; - writememl(easeg, cpu_state.eaaddr + 0x18, tmp); - offset = 0x1c; - } - break; + tmp = 0xffff0000 | i387_get_control_word(); + writememl(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = 0xffff0000 | i387_get_status_word(); + writememl(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = 0xffff0000 | fpu_state.tag; + writememl(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = 0xffff0000 | (fp_ip & 0xffff); + writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); + tmp = ((fp_ip & 0xffff0000) >> 4) | fpu_state.foo; + writememl(easeg, cpu_state.eaaddr + 0x10, tmp); + tmp = 0xffff0000 | (fp_dp & 0xffff); + writememl(easeg, cpu_state.eaaddr + 0x14, tmp); + tmp = (fp_dp & 0xffff0000) >> 4; + writememl(easeg, cpu_state.eaaddr + 0x18, tmp); + offset = 0x1c; + } + break; + case 0x101: + { /*32-bit protected mode*/ + uint32_t tmp; + tmp = 0xffff0000 | i387_get_control_word(); + writememl(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = 0xffff0000 | i387_get_status_word(); + writememl(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = 0xffff0000 | fpu_state.tag; + writememl(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = (uint32_t) (fpu_state.fip); + writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); + tmp = fpu_state.fcs | (((uint32_t) (fpu_state.foo)) << 16); + writememl(easeg, cpu_state.eaaddr + 0x10, tmp); + tmp = (uint32_t) (fpu_state.fdp); + writememl(easeg, cpu_state.eaaddr + 0x14, tmp); + tmp = 0xffff0000 | fpu_state.fds; + writememl(easeg, cpu_state.eaaddr + 0x18, tmp); + offset = 0x1c; + } + break; } return (cpu_state.eaaddr + offset); @@ -115,100 +119,104 @@ fpu_load_environment(void) unsigned offset = 0; switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { - case 0x000: { /*16-bit real mode*/ - uint16_t tmp; - uint32_t fp_ip; - uint32_t fp_dp; + case 0x000: + { /*16-bit real mode*/ + uint16_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); - fp_dp = (tmp & 0xf000) << 4; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); - fpu_state.fdp = fp_dp | tmp; - fpu_state.fds = 0; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); - fp_ip = (tmp & 0xf000) << 4; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); - fpu_state.fip = fp_ip | tmp; - fpu_state.fcs = 0; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); - fpu_state.tag = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); - fpu_state.swd = tmp; - fpu_state.tos = (tmp >> 11) & 7; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); - fpu_state.cwd = tmp; - offset = 0x0e; - } - break; - case 0x001: {/*16-bit protected mode*/ - uint16_t tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); - fpu_state.fds = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); - fpu_state.fdp = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); - fpu_state.fcs = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); - fpu_state.fip = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); - fpu_state.tag = tmp; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); - fpu_state.swd = tmp; - fpu_state.tos = (tmp >> 11) & 7; - tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); - fpu_state.cwd = tmp; - offset = 0x0e; - } - break; - case 0x100: { /*32-bit real mode*/ - uint32_t tmp; - uint32_t fp_ip; - uint32_t fp_dp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); + fp_dp = (tmp & 0xf000) << 4; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); + fpu_state.fdp = fp_dp | tmp; + fpu_state.fds = 0; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); + fp_ip = (tmp & 0xf000) << 4; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); + fpu_state.fip = fp_ip | tmp; + fpu_state.fcs = 0; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); + fpu_state.tag = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); + fpu_state.swd = tmp; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp; + offset = 0x0e; + } + break; + case 0x001: + { /*16-bit protected mode*/ + uint16_t tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); + fpu_state.fds = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); + fpu_state.fdp = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); + fpu_state.fcs = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); + fpu_state.fip = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); + fpu_state.tag = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); + fpu_state.swd = tmp; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp; + offset = 0x0e; + } + break; + case 0x100: + { /*32-bit real mode*/ + uint32_t tmp; + uint32_t fp_ip; + uint32_t fp_dp; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); - fp_dp = (tmp & 0x0ffff000) << 4; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); - fp_dp |= (tmp & 0xffff); - fpu_state.fdp = fp_dp; - fpu_state.fds = 0; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); - fpu_state.foo = tmp & 0x07ff; - fp_ip = (tmp & 0x0ffff000) << 4; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); - fp_ip |= (tmp & 0xffff); - fpu_state.fip = fp_ip; - fpu_state.fcs = 0; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); - fpu_state.tag = tmp & 0xffff; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); - fpu_state.swd = tmp & 0xffff; - fpu_state.tos = (tmp >> 11) & 7; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); - fpu_state.cwd = tmp & 0xffff; - offset = 0x1c; - } - break; - case 0x101: { /*32-bit protected mode*/ - uint32_t tmp; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); - fpu_state.fds = tmp & 0xffff; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); - fpu_state.fdp = tmp; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); - fpu_state.fcs = tmp & 0xffff; - fpu_state.foo = (tmp >> 16) & 0x07ff; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); - fpu_state.fip = tmp; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); - fpu_state.tag = tmp & 0xffff; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); - fpu_state.swd = tmp & 0xffff; - fpu_state.tos = (tmp >> 11) & 7; - tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); - fpu_state.cwd = tmp & 0xffff; - offset = 0x1c; - } - break; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); + fp_dp = (tmp & 0x0ffff000) << 4; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); + fp_dp |= (tmp & 0xffff); + fpu_state.fdp = fp_dp; + fpu_state.fds = 0; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); + fpu_state.foo = tmp & 0x07ff; + fp_ip = (tmp & 0x0ffff000) << 4; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); + fp_ip |= (tmp & 0xffff); + fpu_state.fip = fp_ip; + fpu_state.fcs = 0; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); + fpu_state.tag = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); + fpu_state.swd = tmp & 0xffff; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp & 0xffff; + offset = 0x1c; + } + break; + case 0x101: + { /*32-bit protected mode*/ + uint32_t tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); + fpu_state.fds = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); + fpu_state.fdp = tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); + fpu_state.fcs = tmp & 0xffff; + fpu_state.foo = (tmp >> 16) & 0x07ff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); + fpu_state.fip = tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); + fpu_state.tag = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); + fpu_state.swd = tmp & 0xffff; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp & 0xffff; + offset = 0x1c; + } + break; } /* always set bit 6 as '1 */ @@ -364,7 +372,7 @@ static int sf_FRSTOR_a16(uint32_t fetchdat) { floatx80 tmp; - int offset; + int offset; FP_ENTER(); fetch_ea_16(fetchdat); @@ -372,7 +380,7 @@ sf_FRSTOR_a16(uint32_t fetchdat) offset = fpu_load_environment(); for (int n = 0; n < 8; n++) { tmp.fraction = readmemq(easeg, offset + (n * 10)); - tmp.exp = readmemw(easeg, offset + (n * 10) + 8); + tmp.exp = readmemw(easeg, offset + (n * 10) + 8); FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n); } CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi)); @@ -384,7 +392,7 @@ static int sf_FRSTOR_a32(uint32_t fetchdat) { floatx80 tmp; - int offset; + int offset; FP_ENTER(); fetch_ea_32(fetchdat); @@ -392,7 +400,7 @@ sf_FRSTOR_a32(uint32_t fetchdat) offset = fpu_load_environment(); for (int n = 0; n < 8; n++) { tmp.fraction = readmemq(easeg, offset + (n * 10)); - tmp.exp = readmemw(easeg, offset + (n * 10) + 8); + tmp.exp = readmemw(easeg, offset + (n * 10) + 8); FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n); } CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi)); @@ -405,7 +413,7 @@ static int sf_FNSAVE_a16(uint32_t fetchdat) { floatx80 stn; - int offset; + int offset; FP_ENTER(); fetch_ea_16(fetchdat); @@ -423,15 +431,15 @@ sf_FNSAVE_a16(uint32_t fetchdat) #else fpu_state.cwd = 0x37F; #endif - fpu_state.swd = 0; - fpu_state.tos = 0; - fpu_state.tag = 0xffff; + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0xffff; cpu_state.ismmx = 0; - fpu_state.foo = 0; - fpu_state.fds = 0; - fpu_state.fdp = 0; - fpu_state.fcs = 0; - fpu_state.fip = 0; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi)); return cpu_state.abrt; @@ -441,7 +449,7 @@ static int sf_FNSAVE_a32(uint32_t fetchdat) { floatx80 stn; - int offset; + int offset; FP_ENTER(); fetch_ea_32(fetchdat); @@ -454,20 +462,20 @@ sf_FNSAVE_a32(uint32_t fetchdat) writememw(easeg, offset + (m * 10) + 8, stn.exp); } -#ifdef FPU_8087 +# ifdef FPU_8087 fpu_state.swd = 0x3FF; -#else +# else fpu_state.cwd = 0x37F; -#endif - fpu_state.swd = 0; - fpu_state.tos = 0; - fpu_state.tag = 0xffff; +# endif + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0xffff; cpu_state.ismmx = 0; - fpu_state.foo = 0; - fpu_state.fds = 0; - fpu_state.fdp = 0; - fpu_state.fcs = 0; - fpu_state.fip = 0; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi)); return cpu_state.abrt; @@ -479,9 +487,7 @@ sf_FNCLEX(uint32_t fetchdat) { FP_ENTER(); cpu_state.pc++; - fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary | FPU_SW_Stack_Fault | FPU_SW_Precision | - FPU_SW_Underflow | FPU_SW_Overflow | FPU_SW_Zero_Div | FPU_SW_Denormal_Op | - FPU_SW_Invalid); + fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary | FPU_SW_Stack_Fault | FPU_SW_Precision | FPU_SW_Underflow | FPU_SW_Overflow | FPU_SW_Zero_Div | FPU_SW_Denormal_Op | FPU_SW_Invalid); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi)); return 0; @@ -497,14 +503,14 @@ sf_FNINIT(uint32_t fetchdat) #else fpu_state.cwd = 0x37F; #endif - fpu_state.swd = 0; + fpu_state.swd = 0; fpu_state.tos = 0; - fpu_state.tag = 0xffff; - fpu_state.foo = 0; - fpu_state.fds = 0; - fpu_state.fdp = 0; - fpu_state.fcs = 0; - fpu_state.fip = 0; + fpu_state.tag = 0xffff; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; cpu_state.ismmx = 0; CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.finit) : (x87_timings.finit * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.finit) : (x87_concurrency.finit * cpu_multi)); @@ -567,7 +573,7 @@ sf_FNSTENV_a16(uint32_t fetchdat) /* mask all floating point exceptions */ fpu_state.cwd |= FPU_CW_Exceptions_Mask; /* clear the B and ES bits in the status word */ - fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary); + fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi)); return cpu_state.abrt; @@ -583,7 +589,7 @@ sf_FNSTENV_a32(uint32_t fetchdat) /* mask all floating point exceptions */ fpu_state.cwd |= FPU_CW_Exceptions_Mask; /* clear the B and ES bits in the status word */ - fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary); + fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary); CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi)); CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi)); return cpu_state.abrt; diff --git a/src/cpu/x87_ops_sf_arith.h b/src/cpu/x87_ops_sf_arith.h index 5dc80b432..5144062bb 100644 --- a/src/cpu/x87_ops_sf_arith.h +++ b/src/cpu/x87_ops_sf_arith.h @@ -1,180 +1,180 @@ -#define sf_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ - static int sf_FADD##name##_a##a_size(uint32_t fetchdat) \ +#define sf_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ + static int sf_FADD##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) \ - result = floatx80_add(a, use_var, &status); \ - \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_add(a, use_var, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FDIV##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FDIV##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) { \ - result = floatx80_div(a, use_var, &status); \ - } \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_div(a, use_var, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FDIVR##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FDIVR##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) { \ - result = floatx80_div(use_var, a, &status); \ - } \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_div(use_var, a, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv##cycle_postfix) : ((x87_concurrency.fdiv##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FMUL##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FMUL##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) { \ - result = floatx80_mul(a, use_var, &status); \ - } \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_mul(a, use_var, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul##cycle_postfix) : ((x87_timings.fmul##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul##cycle_postfix) : ((x87_concurrency.fmul##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FSUB##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FSUB##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) \ - result = floatx80_sub(a, use_var, &status); \ - \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_sub(a, use_var, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FSUBR##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FSUBR##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a, result; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ - FPU_check_pending_exceptions(); \ + FPU_check_pending_exceptions(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_stack_underflow(fetchdat, 0, 0); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (!is_nan) \ - result = floatx80_sub(use_var, a, &status); \ - \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_save_regi(result, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_sub(use_var, a, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ return 0; \ @@ -198,10 +198,9 @@ sf_FPU(il, uint32_t, 16, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i #ifndef FPU_8087 sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32) #endif -// clang-format on + // clang-format on -static int -sf_FADD_st0_stj(uint32_t fetchdat) + static int sf_FADD_st0_stj(uint32_t fetchdat) { floatx80 a; floatx80 b; @@ -217,8 +216,8 @@ sf_FADD_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_add(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -246,8 +245,8 @@ sf_FADD_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_add(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -276,8 +275,8 @@ sf_FADDP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_add(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -307,8 +306,8 @@ sf_FDIV_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -336,8 +335,8 @@ sf_FDIV_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -364,8 +363,8 @@ sf_FDIVP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -395,8 +394,8 @@ sf_FDIVR_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -423,8 +422,8 @@ sf_FDIVR_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) @@ -451,8 +450,8 @@ sf_FDIVRP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_div(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -482,8 +481,8 @@ sf_FMUL_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_mul(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -511,8 +510,8 @@ sf_FMUL_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_mul(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -540,8 +539,8 @@ sf_FMULP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_mul(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -571,8 +570,8 @@ sf_FSUB_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -600,8 +599,8 @@ sf_FSUB_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -629,8 +628,8 @@ sf_FSUBP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -660,8 +659,8 @@ sf_FSUBR_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(fetchdat & 7); - b = FPU_read_regi(0); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -689,8 +688,8 @@ sf_FSUBR_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -718,8 +717,8 @@ sf_FSUBRP_sti_st0(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); result = floatx80_sub(a, b, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { @@ -736,7 +735,7 @@ next_ins: static int sf_FSQRT(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -762,7 +761,7 @@ next_ins: static int sf_FRNDINT(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); diff --git a/src/cpu/x87_ops_sf_compare.h b/src/cpu/x87_ops_sf_compare.h index 0dde17a05..6b4c1cb62 100644 --- a/src/cpu/x87_ops_sf_compare.h +++ b/src/cpu/x87_ops_sf_compare.h @@ -1,76 +1,76 @@ -#define cmp_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ - static int sf_FCOM##name##_a##a_size(uint32_t fetchdat) \ +#define cmp_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ + static int sf_FCOM##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a; \ - int rc; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a; \ + int rc; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ - setcc(C0 | C2 | C3); \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (is_nan) { \ - rc = float_relation_unordered; \ - float_raise(&status, float_flag_invalid); \ - } else { \ - rc = floatx80_compare_two(a, use_var, &status); \ - } \ - setcc(FPU_status_word_flags_fpu_compare(rc)); \ - FPU_exception(fetchdat, status.float_exception_flags, 0); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ + setcc(C0 | C2 | C3); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (is_nan) { \ + rc = float_relation_unordered; \ + float_raise(&status, float_flag_invalid); \ + } else { \ + rc = floatx80_compare_two(a, use_var, &status); \ + } \ + setcc(FPU_status_word_flags_fpu_compare(rc)); \ + FPU_exception(fetchdat, status.float_exception_flags, 0); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \ return 0; \ } \ - static int sf_FCOMP##name##_a##a_size(uint32_t fetchdat) \ + static int sf_FCOMP##name##_a##a_size(uint32_t fetchdat) \ { \ - floatx80 a; \ - int rc; \ - struct float_status_t status; \ - optype temp; \ + floatx80 a; \ + int rc; \ + struct float_status_t status; \ + optype temp; \ FP_ENTER(); \ fetch_ea_##a_size(fetchdat); \ SEG_CHECK_READ(cpu_state.ea_seg); \ - load_var = rw; \ - if (cpu_state.abrt) \ - return 1;\ - clear_C1(); \ - if (IS_TAG_EMPTY(0)) { \ - FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ - setcc(C0 | C2 | C3); \ - if (is_IA_masked()) \ - FPU_pop(); \ - \ - goto next_ins; \ - } \ - status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ - a = FPU_read_regi(0); \ - if (is_nan) { \ - rc = float_relation_unordered; \ - float_raise(&status, float_flag_invalid); \ - } else { \ - rc = floatx80_compare_two(a, use_var, &status); \ - } \ - setcc(FPU_status_word_flags_fpu_compare(rc)); \ - if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ - FPU_pop(); \ - \ -next_ins: \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1; \ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ + setcc(C0 | C2 | C3); \ + if (is_IA_masked()) \ + FPU_pop(); \ + \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (is_nan) { \ + rc = float_relation_unordered; \ + float_raise(&status, float_flag_invalid); \ + } else { \ + rc = floatx80_compare_two(a, use_var, &status); \ + } \ + setcc(FPU_status_word_flags_fpu_compare(rc)); \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_pop(); \ + \ +next_ins: \ CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \ CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \ return 0; \ - } \ + } // clang-format off cmp_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32) @@ -90,15 +90,14 @@ cmp_FPU(il, int32_t, 16, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i #ifndef FPU_8087 cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32) #endif -// clang-format on + // clang-format on -static int -sf_FCOM_sti(uint32_t fetchdat) + static int sf_FCOM_sti(uint32_t fetchdat) { floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -109,9 +108,9 @@ sf_FCOM_sti(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_two(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); FPU_exception(fetchdat, status.float_exception_flags, 0); @@ -127,7 +126,7 @@ sf_FCOMP_sti(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -141,9 +140,9 @@ sf_FCOMP_sti(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_two(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); @@ -161,7 +160,7 @@ sf_FCOMPP(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -176,9 +175,9 @@ sf_FCOMPP(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(1); - rc = floatx80_compare_two(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + rc = floatx80_compare_two(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); @@ -198,7 +197,7 @@ sf_FUCOMPP(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -213,9 +212,9 @@ sf_FUCOMPP(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(1); - rc = floatx80_compare_quiet(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + rc = floatx80_compare_quiet(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); @@ -228,14 +227,14 @@ next_ins: return 0; } -#ifndef OPS_286_386 +# ifndef OPS_286_386 static int sf_FCOMI_st0_stj(uint32_t fetchdat) { floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -247,9 +246,9 @@ sf_FCOMI_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_two(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); FPU_write_eflags_fpu_compare(rc); FPU_exception(fetchdat, status.float_exception_flags, 0); @@ -264,7 +263,7 @@ sf_FCOMIP_st0_stj(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -279,9 +278,9 @@ sf_FCOMIP_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_two(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); FPU_write_eflags_fpu_compare(rc); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); @@ -292,7 +291,7 @@ next_ins: CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); return 0; } -#endif +# endif static int sf_FUCOM_sti(uint32_t fetchdat) @@ -300,7 +299,7 @@ sf_FUCOM_sti(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -311,9 +310,9 @@ sf_FUCOM_sti(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_quiet(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); FPU_exception(fetchdat, status.float_exception_flags, 0); @@ -329,7 +328,7 @@ sf_FUCOMP_sti(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -343,9 +342,9 @@ sf_FUCOMP_sti(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_quiet(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_pop(); @@ -356,14 +355,14 @@ next_ins: return 0; } -#ifndef OPS_286_386 +# ifndef OPS_286_386 static int sf_FUCOMI_st0_stj(uint32_t fetchdat) { floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -375,9 +374,9 @@ sf_FUCOMI_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_quiet(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); FPU_write_eflags_fpu_compare(rc); FPU_exception(fetchdat, status.float_exception_flags, 0); @@ -392,7 +391,7 @@ sf_FUCOMIP_st0_stj(uint32_t fetchdat) floatx80 a; floatx80 b; struct float_status_t status; - int rc; + int rc; FP_ENTER(); cpu_state.pc++; @@ -407,9 +406,9 @@ sf_FUCOMIP_st0_stj(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(fetchdat & 7); - rc = floatx80_compare_quiet(a, b, &status); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); FPU_write_eflags_fpu_compare(rc); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_pop(); @@ -419,13 +418,13 @@ next_ins: CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); return 0; } -#endif +# endif #endif static int sf_FTST(uint32_t fetchdat) { - int rc; + int rc; struct float_status_t status; FP_ENTER(); @@ -436,7 +435,7 @@ sf_FTST(uint32_t fetchdat) setcc(C0 | C2 | C3); } else { status = i387cw_to_softfloat_status_word(i387_get_control_word()); - rc = floatx80_compare_two(FPU_read_regi(0), Const_Z, &status); + rc = floatx80_compare_two(FPU_read_regi(0), Const_Z, &status); setcc(FPU_status_word_flags_fpu_compare(rc)); FPU_exception(fetchdat, status.float_exception_flags, 0); } @@ -448,19 +447,19 @@ sf_FTST(uint32_t fetchdat) static int sf_FXAM(uint32_t fetchdat) { - floatx80 reg; - int sign; + floatx80 reg; + int sign; float_class_t aClass; FP_ENTER(); cpu_state.pc++; - reg = FPU_read_regi(0); + reg = FPU_read_regi(0); sign = floatx80_sign(reg); - /* - * Examine the contents of the ST(0) register and sets the condition - * code flags C0, C2 and C3 in the FPU status word to indicate the - * class of value or number in the register. - */ + /* + * Examine the contents of the ST(0) register and sets the condition + * code flags C0, C2 and C3 in the FPU status word to indicate the + * class of value or number in the register. + */ if (IS_TAG_EMPTY(0)) { setcc(C3 | C1 | C0); } else { @@ -490,10 +489,10 @@ sf_FXAM(uint32_t fetchdat) break; } } - /* - * The C1 flag is set to the sign of the value in ST(0), regardless - * of whether the register is empty or full. - */ + /* + * The C1 flag is set to the sign of the value in ST(0), regardless + * of whether the register is empty or full. + */ if (!sign) clear_C1(); diff --git a/src/cpu/x87_ops_sf_const.h b/src/cpu/x87_ops_sf_const.h index 708c6ff7a..0808cbae8 100644 --- a/src/cpu/x87_ops_sf_const.h +++ b/src/cpu/x87_ops_sf_const.h @@ -1,14 +1,14 @@ /* A fast way to find out whether x is one of RC_DOWN or RC_CHOP (and not one of RC_RND or RC_UP). */ -#define DOWN_OR_CHOP() (fpu_state.cwd & FPU_CW_RC & FPU_RC_DOWN) +#define DOWN_OR_CHOP() (fpu_state.cwd & FPU_CW_RC & FPU_RC_DOWN) static __inline floatx80 FPU_round_const(const floatx80 a, int adj) { - floatx80 result = a; - result.fraction += adj; - return result; + floatx80 result = a; + result.fraction += adj; + return result; } static int diff --git a/src/cpu/x87_ops_sf_load_store.h b/src/cpu/x87_ops_sf_load_store.h index 4d0fb1281..383a7ee52 100644 --- a/src/cpu/x87_ops_sf_load_store.h +++ b/src/cpu/x87_ops_sf_load_store.h @@ -17,13 +17,18 @@ * Copyright 2016-2019 Miran Grca. */ -#define swap_values16u(a, b) { uint16_t tmp = a; a = b; b = tmp; } +#define swap_values16u(a, b) \ + { \ + uint16_t tmp = a; \ + a = b; \ + b = tmp; \ + } static int sf_FILDiw_a16(uint32_t fetchdat) { floatx80 result; - int16_t temp; + int16_t temp; FP_ENTER(); FPU_check_pending_exceptions(); @@ -49,7 +54,7 @@ static int sf_FILDiw_a32(uint32_t fetchdat) { floatx80 result; - int16_t temp; + int16_t temp; FP_ENTER(); FPU_check_pending_exceptions(); @@ -76,7 +81,7 @@ static int sf_FILDil_a16(uint32_t fetchdat) { floatx80 result; - int32_t templ; + int32_t templ; FP_ENTER(); FPU_check_pending_exceptions(); @@ -102,7 +107,7 @@ static int sf_FILDil_a32(uint32_t fetchdat) { floatx80 result; - int32_t templ; + int32_t templ; FP_ENTER(); FPU_check_pending_exceptions(); @@ -129,7 +134,7 @@ static int sf_FILDiq_a16(uint32_t fetchdat) { floatx80 result; - int64_t temp64; + int64_t temp64; FP_ENTER(); FPU_check_pending_exceptions(); @@ -155,7 +160,7 @@ static int sf_FILDiq_a32(uint32_t fetchdat) { floatx80 result; - int64_t temp64; + int64_t temp64; FP_ENTER(); FPU_check_pending_exceptions(); @@ -182,9 +187,9 @@ static int sf_FLDs_a16(uint32_t fetchdat) { struct float_status_t status; - floatx80 result; - float32 load_reg; - unsigned unmasked; + floatx80 result; + float32 load_reg; + unsigned unmasked; FP_ENTER(); FPU_check_pending_exceptions(); @@ -198,8 +203,8 @@ sf_FLDs_a16(uint32_t fetchdat) FPU_stack_overflow(fetchdat); goto next_ins; } - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - result = float32_to_floatx80(load_reg, &status); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float32_to_floatx80(load_reg, &status); unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); if (!(unmasked & FPU_CW_Invalid)) { FPU_push(); @@ -216,9 +221,9 @@ static int sf_FLDs_a32(uint32_t fetchdat) { struct float_status_t status; - floatx80 result; - float32 load_reg; - unsigned unmasked; + floatx80 result; + float32 load_reg; + unsigned unmasked; FP_ENTER(); FPU_check_pending_exceptions(); @@ -232,8 +237,8 @@ sf_FLDs_a32(uint32_t fetchdat) FPU_stack_overflow(fetchdat); goto next_ins; } - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - result = float32_to_floatx80(load_reg, &status); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float32_to_floatx80(load_reg, &status); unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); if (!(unmasked & FPU_CW_Invalid)) { FPU_push(); @@ -251,9 +256,9 @@ static int sf_FLDd_a16(uint32_t fetchdat) { struct float_status_t status; - floatx80 result; - float64 load_reg; - unsigned unmasked; + floatx80 result; + float64 load_reg; + unsigned unmasked; FP_ENTER(); FPU_check_pending_exceptions(); @@ -267,8 +272,8 @@ sf_FLDd_a16(uint32_t fetchdat) FPU_stack_overflow(fetchdat); goto next_ins; } - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - result = float64_to_floatx80(load_reg, &status); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float64_to_floatx80(load_reg, &status); unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); if (!(unmasked & FPU_CW_Invalid)) { FPU_push(); @@ -285,9 +290,9 @@ static int sf_FLDd_a32(uint32_t fetchdat) { struct float_status_t status; - floatx80 result; - float64 load_reg; - unsigned unmasked; + floatx80 result; + float64 load_reg; + unsigned unmasked; FP_ENTER(); FPU_check_pending_exceptions(); @@ -301,8 +306,8 @@ sf_FLDd_a32(uint32_t fetchdat) FPU_stack_overflow(fetchdat); goto next_ins; } - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - result = float64_to_floatx80(load_reg, &status); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float64_to_floatx80(load_reg, &status); unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); if (!(unmasked & FPU_CW_Invalid)) { FPU_push(); @@ -326,7 +331,7 @@ sf_FLDe_a16(uint32_t fetchdat) fetch_ea_16(fetchdat); SEG_CHECK_READ(cpu_state.ea_seg); result.fraction = readmemq(easeg, cpu_state.eaaddr); - result.exp = readmemw(easeg, cpu_state.eaaddr + 8); + result.exp = readmemw(easeg, cpu_state.eaaddr + 8); if (cpu_state.abrt) return 1; clear_C1(); @@ -351,7 +356,7 @@ sf_FLDe_a32(uint32_t fetchdat) fetch_ea_32(fetchdat); SEG_CHECK_READ(cpu_state.ea_seg); result.fraction = readmemq(easeg, cpu_state.eaaddr); - result.exp = readmemw(easeg, cpu_state.eaaddr + 8); + result.exp = readmemw(easeg, cpu_state.eaaddr + 8); if (cpu_state.abrt) return 1; clear_C1(); @@ -371,7 +376,7 @@ static int sf_FLD_sti(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 sti_reg; + floatx80 sti_reg; FP_ENTER(); FPU_check_pending_exceptions(); @@ -403,8 +408,8 @@ static int sf_FISTiw_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int16_t save_reg = int16_indefinite; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -417,7 +422,7 @@ sf_FISTiw_a16(uint32_t fetchdat) goto next_ins; } } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int16(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -438,8 +443,8 @@ static int sf_FISTiw_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int16_t save_reg = int16_indefinite; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -451,7 +456,7 @@ sf_FISTiw_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int16(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -472,8 +477,8 @@ static int sf_FISTPiw_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int16_t save_reg = int16_indefinite; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -485,7 +490,7 @@ sf_FISTPiw_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int16(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -509,8 +514,8 @@ static int sf_FISTPiw_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int16_t save_reg = int16_indefinite; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -522,7 +527,7 @@ sf_FISTPiw_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int16(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -546,8 +551,8 @@ static int sf_FISTil_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int32_t save_reg = int32_indefinite; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -559,7 +564,7 @@ sf_FISTil_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -580,8 +585,8 @@ static int sf_FISTil_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int32_t save_reg = int32_indefinite; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -593,7 +598,7 @@ sf_FISTil_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -614,8 +619,8 @@ static int sf_FISTPil_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int32_t save_reg = int32_indefinite; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -627,7 +632,7 @@ sf_FISTPil_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -651,8 +656,8 @@ static int sf_FISTPil_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int32_t save_reg = int32_indefinite; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -664,7 +669,7 @@ sf_FISTPil_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -688,8 +693,8 @@ static int sf_FISTPiq_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int64_t save_reg = int64_indefinite; + uint16_t sw = fpu_state.swd; + int64_t save_reg = int64_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -701,9 +706,9 @@ sf_FISTPiq_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int64(FPU_read_regi(0), &status); - if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; } } @@ -725,8 +730,8 @@ static int sf_FISTPiq_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - int64_t save_reg = int64_indefinite; + uint16_t sw = fpu_state.swd; + int64_t save_reg = int64_indefinite; FP_ENTER(); FPU_check_pending_exceptions(); @@ -738,7 +743,7 @@ sf_FISTPiq_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_int64(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -762,12 +767,12 @@ static int sf_FBSTP_PACKED_BCD_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - uint16_t save_reg_hi = 0xffff; - uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); - floatx80 reg; - int64_t save_val; - int sign; + uint16_t sw = fpu_state.swd; + uint16_t save_reg_hi = 0xffff; + uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); + floatx80 reg; + int64_t save_val; + int sign; FP_ENTER(); FPU_check_pending_exceptions(); @@ -779,10 +784,10 @@ sf_FBSTP_PACKED_BCD_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - reg = FPU_read_regi(0); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + reg = FPU_read_regi(0); save_val = floatx80_to_int64(reg, &status); - sign = (reg.exp & 0x8000) != 0; + sign = (reg.exp & 0x8000) != 0; if (sign) save_val = -save_val; @@ -793,12 +798,12 @@ sf_FBSTP_PACKED_BCD_a16(uint32_t fetchdat) save_reg_hi = sign ? 0x8000 : 0; save_reg_lo = 0; for (int i = 0; i < 16; i++) { - save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i); + save_reg_lo += ((uint64_t) (save_val % 10)) << (4 * i); save_val /= 10; } - save_reg_hi += (uint16_t)(save_val % 10); + save_reg_hi += (uint16_t) (save_val % 10); save_val /= 10; - save_reg_hi += (uint16_t)(save_val % 10) << 4; + save_reg_hi += (uint16_t) (save_val % 10) << 4; } /* check for fpu arithmetic exceptions */ if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { @@ -826,12 +831,12 @@ static int sf_FBSTP_PACKED_BCD_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - uint16_t save_reg_hi = 0xffff; - uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); - floatx80 reg; - int64_t save_val; - int sign; + uint16_t sw = fpu_state.swd; + uint16_t save_reg_hi = 0xffff; + uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); + floatx80 reg; + int64_t save_val; + int sign; FP_ENTER(); FPU_check_pending_exceptions(); @@ -843,10 +848,10 @@ sf_FBSTP_PACKED_BCD_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); - reg = FPU_read_regi(0); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + reg = FPU_read_regi(0); save_val = floatx80_to_int64(reg, &status); - sign = (reg.exp & 0x8000) != 0; + sign = (reg.exp & 0x8000) != 0; if (sign) save_val = -save_val; @@ -857,12 +862,12 @@ sf_FBSTP_PACKED_BCD_a32(uint32_t fetchdat) save_reg_hi = sign ? 0x8000 : 0; save_reg_lo = 0; for (int i = 0; i < 16; i++) { - save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i); + save_reg_lo += ((uint64_t) (save_val % 10)) << (4 * i); save_val /= 10; } - save_reg_hi += (uint16_t)(save_val % 10); + save_reg_hi += (uint16_t) (save_val % 10); save_val /= 10; - save_reg_hi += (uint16_t)(save_val % 10) << 4; + save_reg_hi += (uint16_t) (save_val % 10) << 4; } /* check for fpu arithmetic exceptions */ if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { @@ -891,8 +896,8 @@ static int sf_FSTs_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float32 save_reg = float32_default_nan; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -904,7 +909,7 @@ sf_FSTs_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -925,8 +930,8 @@ static int sf_FSTs_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float32 save_reg = float32_default_nan; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -938,7 +943,7 @@ sf_FSTs_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -959,8 +964,8 @@ static int sf_FSTPs_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float32 save_reg = float32_default_nan; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -972,7 +977,7 @@ sf_FSTPs_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -997,8 +1002,8 @@ static int sf_FSTPs_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float32 save_reg = float32_default_nan; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1010,7 +1015,7 @@ sf_FSTPs_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float32(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -1034,8 +1039,8 @@ static int sf_FSTd_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float64 save_reg = float64_default_nan; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1047,7 +1052,7 @@ sf_FSTd_a16(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float64(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -1068,8 +1073,8 @@ static int sf_FSTd_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float64 save_reg = float64_default_nan; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1081,7 +1086,7 @@ sf_FSTd_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float64(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -1102,8 +1107,8 @@ static int sf_FSTPd_a16(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float64 save_reg = float64_default_nan; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1116,7 +1121,7 @@ sf_FSTPd_a16(uint32_t fetchdat) goto next_ins; } } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float64(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { goto next_ins; @@ -1140,8 +1145,8 @@ static int sf_FSTPd_a32(uint32_t fetchdat) { struct float_status_t status; - uint16_t sw = fpu_state.swd; - float64 save_reg = float64_default_nan; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1153,7 +1158,7 @@ sf_FSTPd_a32(uint32_t fetchdat) if (!is_IA_masked()) goto next_ins; } else { - status = i387cw_to_softfloat_status_word(i387_get_control_word()); + status = i387cw_to_softfloat_status_word(i387_get_control_word()); save_reg = floatx80_to_float64(FPU_read_regi(0), &status); if (FPU_exception(fetchdat, status.float_exception_flags, 1)) goto next_ins; @@ -1177,7 +1182,7 @@ static int sf_FSTPe_a16(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 save_reg; + floatx80 save_reg; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1209,7 +1214,7 @@ static int sf_FSTPe_a32(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 save_reg; + floatx80 save_reg; FP_ENTER(); FPU_check_pending_exceptions(); @@ -1279,26 +1284,26 @@ sf_FSTP_sti(uint32_t fetchdat) } #ifndef FPU_8087 -#ifndef OPS_286_386 -# define sf_FCMOV(condition) \ - static int sf_FCMOV##condition(uint32_t fetchdat) \ - { \ - FP_ENTER(); \ - FPU_check_pending_exceptions(); \ - cpu_state.pc++; \ - if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) \ - FPU_stack_underflow(fetchdat, 0, 0); \ - else { \ - if (cond_##condition) { \ - FPU_save_regi(FPU_read_regi(fetchdat & 7), 0); \ - } \ - } \ - CLOCK_CYCLES_FPU(4); \ - return 0; \ - } +# ifndef OPS_286_386 +# define sf_FCMOV(condition) \ + static int sf_FCMOV##condition(uint32_t fetchdat) \ + { \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + cpu_state.pc++; \ + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) \ + FPU_stack_underflow(fetchdat, 0, 0); \ + else { \ + if (cond_##condition) { \ + FPU_save_regi(FPU_read_regi(fetchdat & 7), 0); \ + } \ + } \ + CLOCK_CYCLES_FPU(4); \ + return 0; \ + } -# define cond_U (PF_SET()) -# define cond_NU (!PF_SET()) +# define cond_U (PF_SET()) +# define cond_NU (!PF_SET()) // clang-format off sf_FCMOV(B) @@ -1310,5 +1315,5 @@ sf_FCMOV(NE) sf_FCMOV(NBE) sf_FCMOV(NU) // clang-format on -#endif +# endif #endif diff --git a/src/cpu/x87_ops_sf_misc.h b/src/cpu/x87_ops_sf_misc.h index 3b468cbf6..85f42e6d5 100644 --- a/src/cpu/x87_ops_sf_misc.h +++ b/src/cpu/x87_ops_sf_misc.h @@ -2,10 +2,10 @@ static int sf_FXCH_sti(uint32_t fetchdat) { const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 st0_reg; - floatx80 sti_reg; - int st0_tag; - int sti_tag; + floatx80 st0_reg; + floatx80 sti_reg; + int st0_tag; + int sti_tag; FP_ENTER(); FPU_check_pending_exceptions(); @@ -50,7 +50,7 @@ sf_FCHS(uint32_t fetchdat) else { clear_C1(); st0_reg = FPU_read_regi(0); - result = floatx80_chs(st0_reg); + result = floatx80_chs(st0_reg); FPU_save_regi(result, 0); } CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fchs) : (x87_timings.fchs * cpu_multi)); @@ -72,7 +72,7 @@ sf_FABS(uint32_t fetchdat) else { clear_C1(); st0_reg = FPU_read_regi(0); - result = floatx80_abs(st0_reg); + result = floatx80_abs(st0_reg); FPU_save_regi(result, 0); } CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fabs) : (x87_timings.fabs * cpu_multi)); diff --git a/src/cpu/x87_ops_sf_trans.h b/src/cpu/x87_ops_sf_trans.h index 8f28104bd..5a99abb4c 100644 --- a/src/cpu/x87_ops_sf_trans.h +++ b/src/cpu/x87_ops_sf_trans.h @@ -1,7 +1,7 @@ static int sf_F2XM1(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -13,7 +13,7 @@ sf_F2XM1(uint32_t fetchdat) } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); result = f2xm1(FPU_read_regi(0), &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_save_regi(result, 0); next_ins: @@ -25,7 +25,7 @@ next_ins: static int sf_FYL2X(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -37,7 +37,7 @@ sf_FYL2X(uint32_t fetchdat) } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); result = fyl2x(FPU_read_regi(0), FPU_read_regi(1), &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); FPU_save_regi(result, 0); } @@ -51,8 +51,8 @@ next_ins: static int sf_FPTAN(uint32_t fetchdat) { - const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); - floatx80 y; + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 y; struct float_status_t status; FP_ENTER(); @@ -74,14 +74,14 @@ sf_FPTAN(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); - y = FPU_read_regi(0); + y = FPU_read_regi(0); if (ftan(&y, &status) == -1) { fpu_state.swd |= C2; goto next_ins; } if (floatx80_is_nan(y)) { - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_save_regi(y, 0); FPU_push(); FPU_save_regi(y, 0); @@ -89,7 +89,7 @@ sf_FPTAN(uint32_t fetchdat) goto next_ins; } - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_save_regi(y, 0); FPU_push(); FPU_save_regi(Const_1, 0); @@ -115,11 +115,11 @@ sf_FPATAN(uint32_t fetchdat) FPU_stack_underflow(fetchdat, 1, 1); goto next_ins; } - a = FPU_read_regi(0); - b = FPU_read_regi(1); + a = FPU_read_regi(0); + b = FPU_read_regi(1); status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); result = fpatan(a, b, &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_pop(); FPU_save_regi(result, 0); } @@ -141,7 +141,7 @@ sf_FXTRACT(uint32_t fetchdat) cpu_state.pc++; clear_C1(); -#if 0 //TODO +#if 0 // TODO if ((IS_TAG_EMPTY(0) || IS_TAG_EMPTY(-1))) { if (IS_TAG_EMPTY(0)) FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); @@ -159,15 +159,15 @@ sf_FXTRACT(uint32_t fetchdat) #endif status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = floatx80_extract(&a, &status); + a = FPU_read_regi(0); + b = floatx80_extract(&a, &status); if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_save_regi(b, 0); // exponent FPU_push(); FPU_save_regi(a, 0); // fraction } -#if 0 //TODO. +#if 0 // TODO. next_ins: #endif CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); @@ -195,10 +195,10 @@ sf_FPREM1(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(1); - flags = floatx80_ieee754_remainder(a, b, &result, "ient, &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + a = FPU_read_regi(0); + b = FPU_read_regi(1); + flags = floatx80_ieee754_remainder(a, b, &result, "ient, &status); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { if (flags >= 0) { cc = 0; if (flags) @@ -242,11 +242,11 @@ sf_FPREM(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word()); - a = FPU_read_regi(0); - b = FPU_read_regi(1); + a = FPU_read_regi(0); + b = FPU_read_regi(1); // handle unsupported extended double-precision floating encodings flags = floatx80_remainder(a, b, &result, "ient, &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { if (flags >= 0) { cc = 0; if (flags) @@ -273,7 +273,7 @@ next_ins: static int sf_FYL2XP1(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -285,7 +285,7 @@ sf_FYL2XP1(uint32_t fetchdat) } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); result = fyl2xp1(FPU_read_regi(0), FPU_read_regi(1), &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_save_regi(result, 1); FPU_pop(); } @@ -325,12 +325,12 @@ sf_FSINCOS(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); - y = FPU_read_regi(0); + y = FPU_read_regi(0); if (fsincos(y, &sin_y, &cos_y, &status) == -1) { fpu_state.swd |= C2; goto next_ins; } - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { FPU_save_regi(sin_y, 0); FPU_push(); FPU_save_regi(cos_y, 0); @@ -346,7 +346,7 @@ next_ins: static int sf_FSCALE(uint32_t fetchdat) { - floatx80 result; + floatx80 result; struct float_status_t status; FP_ENTER(); @@ -358,7 +358,7 @@ sf_FSCALE(uint32_t fetchdat) } status = i387cw_to_softfloat_status_word(i387_get_control_word()); result = floatx80_scale(FPU_read_regi(0), FPU_read_regi(1), &status); - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_save_regi(result, 0); next_ins: @@ -371,7 +371,7 @@ next_ins: static int sf_FSIN(uint32_t fetchdat) { - floatx80 y; + floatx80 y; struct float_status_t status; FP_ENTER(); @@ -383,12 +383,12 @@ sf_FSIN(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); - y = FPU_read_regi(0); + y = FPU_read_regi(0); if (fsin(&y, &status) == -1) { fpu_state.swd |= C2; goto next_ins; } - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_save_regi(y, 0); next_ins: @@ -400,7 +400,7 @@ next_ins: static int sf_FCOS(uint32_t fetchdat) { - floatx80 y; + floatx80 y; struct float_status_t status; FP_ENTER(); @@ -412,12 +412,12 @@ sf_FCOS(uint32_t fetchdat) goto next_ins; } status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); - y = FPU_read_regi(0); + y = FPU_read_regi(0); if (fcos(&y, &status) == -1) { fpu_state.swd |= C2; goto next_ins; } - if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) FPU_save_regi(y, 0); next_ins: From 6ceb63aacca53403c2a7e9cb4799e30fba8dfc3e Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 12 Aug 2023 01:15:06 +0200 Subject: [PATCH 3/4] Do not use atomic_load() with variables that are not atomic. --- src/device/mouse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/device/mouse.c b/src/device/mouse.c index 3921b6eb6..4ca2d0a6f 100644 --- a/src/device/mouse.c +++ b/src/device/mouse.c @@ -423,8 +423,8 @@ mouse_set_buttons(int buttons) void mouse_get_abs_coords(double *x_abs, double *y_abs) { - *x_abs = atomic_load(&mouse_x_abs); - *y_abs = atomic_load(&mouse_y_abs); + *x_abs = mouse_x_abs; + *y_abs = mouse_y_abs; } void From 33471594f4bb6ee0eeb577d2fb4ab83d3ca7e9d7 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 12 Aug 2023 01:16:49 +0200 Subject: [PATCH 4/4] First batch of video fixes, also fixes the MDA timings. --- src/include/86box/vid_ega.h | 2 +- src/pit.c | 12 ++++++++ src/video/vid_ega.c | 57 ++++++++++++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/include/86box/vid_ega.h b/src/include/86box/vid_ega.h index 97a08b583..a5ca1114c 100644 --- a/src/include/86box/vid_ega.h +++ b/src/include/86box/vid_ega.h @@ -44,9 +44,9 @@ typedef struct ega_t { uint8_t colournocare; uint8_t scrblank; uint8_t plane_mask; + uint8_t ctl_mode; uint8_t pad; uint8_t pad0; - uint8_t pad1; uint8_t crtc[32]; uint8_t gdcreg[16]; uint8_t attrregs[32]; diff --git a/src/pit.c b/src/pit.c index 648e0018b..de7cedc49 100644 --- a/src/pit.c +++ b/src/pit.c @@ -1014,7 +1014,11 @@ pit_set_clock(int clock) PITCONSTD = (cpuclock / 1193182.0); PITCONST = (uint64_t) (PITCONSTD * (double) (1ULL << 32)); +#ifdef IMPRECISE_CGACONST CGACONST = (uint64_t) ((cpuclock / (19687503.0 / 11.0)) * (double) (1ULL << 32)); +#else + CGACONST = (uint64_t) ((cpuclock / (157500000.0 / 88.0)) * (double) (1ULL << 32)); +#endif ISACONST = (uint64_t) ((cpuclock / (double) cpu_isa_speed) * (double) (1ULL << 32)); xt_cpu_multi = 1ULL; } else { @@ -1064,7 +1068,11 @@ pit_set_clock(int clock) } else if (cpuclock != 14318184.0) { PITCONSTD = (cpuclock / 1193182.0); PITCONST = (uint64_t) (PITCONSTD * (double) (1ULL << 32)); +#ifdef IMPRECISE_CGACONST CGACONST = (uint64_t) ((cpuclock / (19687503.0 / 11.0)) * (double) (1ULL << 32)); +#else + CGACONST = (uint64_t) ((cpuclock / (157500000.0 / 88.0)) * (double) (1ULL << 32)); +#endif } ISACONST = (1ULL << 32ULL); @@ -1074,7 +1082,11 @@ pit_set_clock(int clock) /* Delay for empty I/O ports. */ io_delay = (int) round(((double) cpu_s->rspeed) / 3000000.0); +#ifdef WRONG_MDACONST MDACONST = (uint64_t) (cpuclock / 2032125.0 * (double) (1ULL << 32)); +#else + MDACONST = (uint64_t) (cpuclock / (16257000.0 / 9.0) * (double) (1ULL << 32)); +#endif HERCCONST = MDACONST; VGACONST1 = (uint64_t) (cpuclock / 25175000.0 * (double) (1ULL << 32)); VGACONST2 = (uint64_t) (cpuclock / 28322000.0 * (double) (1ULL << 32)); diff --git a/src/video/vid_ega.c b/src/video/vid_ega.c index babd45756..67ca78821 100644 --- a/src/video/vid_ega.c +++ b/src/video/vid_ega.c @@ -188,6 +188,10 @@ ega_out(uint16_t addr, uint8_t val, void *priv) break; } break; + case 0x3c6: + if (ega_type == 2) + ega->ctl_mode = val; + break; case 0x3ce: ega->gdcaddr = val; break; @@ -308,6 +312,10 @@ ega_in(uint16_t addr, void *priv) if (ega_type) ret = ega->seqregs[ega->seqaddr & 0xf]; break; + case 0x3c6: + if (ega_type == 2) + ret = ega->ctl_mode; + break; case 0x3c8: if (ega_type) ret = 2; @@ -341,7 +349,7 @@ ega_in(uint16_t addr, void *priv) case 0x10: case 0x11: - // TODO: Return light pen address once implemented + /* TODO: Return light pen address once implemented. */ if (ega_type) ret = ega->crtc[ega->crtcreg]; break; @@ -353,9 +361,23 @@ ega_in(uint16_t addr, void *priv) break; case 0x3da: ega->attrff = 0; - ega->stat ^= 0x30; /*Fools IBM EGA video BIOS self-test*/ + ega->stat ^= 0x30; /* Fools IBM EGA video BIOS self-test. */ ret = ega->stat; break; + case 0x7c6: + ret = 0xfd; /* EGA mode supported. */ + break; + case 0xbc6: + /* 0000 = None; + 0001 = Compaq Dual-Mode (DM) Monitor; + 0010 = RGBI Color Monitor; + 0011 = COMAPQ Color Monitor (RrGgBb) or Compatible; + 0100 - 1111 = Reserved. */ + ret = 0x01; + break; + case 0xfc6: + ret = 0xfd; + break; default: break; @@ -368,6 +390,7 @@ void ega_recalctimings(ega_t *ega) { int clksel; + int color; double _dispontime; double _dispofftime; @@ -411,7 +434,26 @@ ega_recalctimings(ega_t *ega) ega->linedbl = ega->crtc[9] & 0x80; ega->rowcount = ega->crtc[9] & 0x1f; - if (ega->eeprom) { + if (ega_type == 2) { + color = (ega->miscout & 1); + clksel = ((ega->miscout & 0xc) >> 2) | ((ega->regs[0xbe] & 0x10) ? 4 : 0); + + if (color) { + if (clksel) + crtcconst = (cpuclock / 16257000.0 * (double) (1ULL << 32)); + else + crtcconst = (cpuclock / (157500000.0 / 11.0) * (double) (1ULL << 32)); + } else { + if (clksel) + crtcconst = (cpuclock / 18981000.0 * (double) (1ULL << 32)); + else + crtcconst = (cpuclock / 16872000.0 * (double) (1ULL << 32)); + } + if (!(ega->seqregs[1] & 1)) + crtcconst *= 9.0; + else + crtcconst *= 8.0; + } else if (ega->eeprom) { clksel = ((ega->miscout & 0xc) >> 2) | ((ega->regs[0xbe] & 0x10) ? 4 : 0); switch (clksel) { @@ -1229,7 +1271,7 @@ ega_standalone_init(const device_t *info) ega_t *ega = malloc(sizeof(ega_t)); int monitor_type; - memset(ega, 0, sizeof(ega_t)); + memset(ega, 0x00, sizeof(ega_t)); video_inform(VIDEO_FLAG_TYPE_SPECIAL, &timing_ega); @@ -1240,6 +1282,8 @@ ega_standalone_init(const device_t *info) if ((info->local == EGA_IBM) || (info->local == EGA_ISKRA) || (info->local == EGA_TSENG)) ega_type = 0; + else if (info->local == EGA_COMPAQ) + ega_type = 2; else ega_type = 1; @@ -1250,6 +1294,7 @@ ega_standalone_init(const device_t *info) 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL); break; case EGA_COMPAQ: + ega->ctl_mode = 0x21; rom_init(&ega->bios_rom, BIOS_CPQ_PATH, 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL); break; @@ -1293,6 +1338,10 @@ ega_standalone_init(const device_t *info) ega->eeprom = malloc(sizeof(ati_eeprom_t)); memset(ega->eeprom, 0, sizeof(ati_eeprom_t)); ati_eeprom_load((ati_eeprom_t *) ega->eeprom, "egawonder800.nvr", 0); + } else if (info->local == EGA_COMPAQ) { + io_sethandler(0x07c6, 0x0001, ega_in, NULL, NULL, ega_out, NULL, NULL, ega); + io_sethandler(0x0bc6, 0x0001, ega_in, NULL, NULL, ega_out, NULL, NULL, ega); + io_sethandler(0x0fc6, 0x0001, ega_in, NULL, NULL, ega_out, NULL, NULL, ega); } return ega;