/*Elements taken into account : - U/V integer pairing - FPU/FXCH pairing - Prefix decode delay (including shadowing) - FPU latencies - AGI stalls Elements not taken into account : - Branch prediction (beyond most simplistic approximation) - PMMX decode queue - MMX latencies */ #include #include <86box/86box.h> #include "cpu.h" #include "x86.h" #include "x86_ops.h" #include "x87.h" #include <86box/mem.h> #include "codegen.h" #include "codegen_ops.h" #include "codegen_timing_common.h" /*Instruction has different execution time for 16 and 32 bit data. Does not pair */ #define CYCLES_HAS_MULTI (1 << 28) #define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8)) /*Instruction lasts given number of cycles. Does not pair*/ #define CYCLES(c) (c | PAIR_NP) static int pair_timings[4][4] = { /* Reg RM RMW Branch*/ /*Reg*/ {1, 2, 3, 2}, /*RM*/ {2, 2, 3, 3}, /*RMW*/ {3, 4, 5, 4}, /*Branch*/ {-1, -1, -1, -1} }; /*Instruction follows either register timing, read-modify, or read-modify-write. May be pairable*/ #define CYCLES_REG (0ull << 0) #define CYCLES_RM (1ull << 0) #define CYCLES_RMW (2ull << 0) #define CYCLES_BRANCH (3ull << 0) /*Instruction has immediate data. Can only be used with PAIR_U/PAIR_V/PAIR_UV*/ #define CYCLES_HASIMM (3ull << 2) #define CYCLES_IMM8 (1ull << 2) #define CYCLES_IMM1632 (2ull << 2) #define CYCLES_MASK ((1ull << 7) - 1) /*Instruction does not pair*/ #define PAIR_NP (0ull << 29) /*Instruction pairs in U pipe only*/ #define PAIR_U (1ull << 29) /*Instruction pairs in V pipe only*/ #define PAIR_V (2ull << 29) /*Instruction pairs in both U and V pipes*/ #define PAIR_UV (3ull << 29) /*Instruction pairs in U pipe only and only with FXCH*/ #define PAIR_FX (5ull << 29) /*Instruction is FXCH and only pairs in V pipe with FX pairable instruction*/ #define PAIR_FXCH (6ull << 29) #define PAIR_FPU (4ull << 29) #define PAIR_MASK (7ull << 29) /*comp_time = cycles until instruction complete i_overlap = cycles that overlap with integer f_overlap = cycles that overlap with subsequent FPU*/ #define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t)comp_time) | ((uint64_t)i_overlap << 41) | ((uint64_t)f_overlap << 49) | PAIR_FPU #define FPU_COMP_TIME(timing) (timing & 0xff) #define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff) #define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff) #define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing)) #define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing)) #define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff) #define INVALID 0 static int u_pipe_full; static uint32_t u_pipe_opcode; static uint64_t *u_pipe_timings; static uint32_t u_pipe_op_32; static uint32_t u_pipe_regmask; static uint32_t u_pipe_fetchdat; static int u_pipe_decode_delay_offset; static uint64_t *u_pipe_deps; static uint32_t regmask_modified; static uint32_t addr_regmask; static int fpu_latency; static int fpu_st_latency[8]; static uint64_t opcode_timings[256] = { /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* ADD ADD PUSH ES POP ES*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* OR OR OR OR*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* OR OR PUSH CS */ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, /* ADC ADC ADC ADC*/ /*10*/ PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, /* ADC ADC PUSH SS POP SS*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* SBB SBB SBB SBB*/ PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, /* SBB SBB PUSH DS POP DS*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* AND AND AND AND*/ /*20*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* AND AND DAA*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* SUB SUB SUB SUB*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* SUB SUB DAS*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* XOR XOR XOR XOR*/ /*30*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* XOR XOR AAA*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* CMP CMP CMP CMP*/ PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, /* CMP CMP AAS*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* INC ESP INC EBP INC ESI INC EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* DEC EAX DEC ECX DEC EDX DEC EBX*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* DEC ESP DEC EBP DEC ESI DEC EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ /*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* POP EAX POP ECX POP EDX POP EBX*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* POP ESP POP EBP POP ESI POP EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSHA POPA BOUND ARPL*/ /*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7), INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), /* INSB INSW OUTSB OUTSW*/ PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13), /* Jxx*/ /*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, /*80*/ INVALID, INVALID, INVALID, INVALID, /* TEST TEST XCHG XCHG*/ PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MOV MOV MOV MOV*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV, /* MOV from seg LEA MOV to seg POP*/ PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, CYCLES(3), PAIR_NP | CYCLES(3), /* NOP XCHG XCHG XCHG*/ /*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* XCHG XCHG XCHG XCHG*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* CBW CWD CALL far WAIT*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1), /* PUSHF POPF SAHF LAHF*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* MOV MOV MOV MOV*/ /*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* MOVSB MOVSW CMPSB CMPSW*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), /* TEST TEST STOSB STOSW*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* LODSB LODSW SCASB SCASW*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), /* MOV*/ /*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* RET imm RET*/ /*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), /* LES LDS MOV MOV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* ENTER LEAVE RETF RETF*/ PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), /* INT3 INT INTO IRET*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ /*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), /* IN AL IN AX OUT_AL OUT_AX*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), /* CALL JMP JMP JMP*/ PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG, /* IN AL IN AX OUT_AL OUT_AX*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), /* REPNE REPE*/ /*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), /* HLT CMC*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID, /* CLC STC CLI STI*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_RMW, INVALID }; static uint64_t opcode_timings_mod3[256] = { /* ADD ADD ADD ADD*/ /*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* ADD ADD PUSH ES POP ES*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* OR OR OR OR*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* OR OR PUSH CS */ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID, /* ADC ADC ADC ADC*/ /*10*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, /* ADC ADC PUSH SS POP SS*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* SBB SBB SBB SBB*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, /* SBB SBB PUSH DS POP DS*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3), /* AND AND AND AND*/ /*20*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* AND AND DAA*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* SUB SUB SUB SUB*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* SUB SUB DAS*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* XOR XOR XOR XOR*/ /*30*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* XOR XOR AAA*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* CMP CMP CMP CMP*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* CMP CMP AAS*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), /* INC EAX INC ECX INC EDX INC EBX*/ /*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* INC ESP INC EBP INC ESI INC EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* DEC EAX DEC ECX DEC EDX DEC EBX*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* DEC ESP DEC EBP DEC ESI DEC EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/ /*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* POP EAX POP ECX POP EDX POP EBX*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* POP ESP POP EBP POP ESI POP EDI*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* PUSHA POPA BOUND ARPL*/ /*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7), INVALID, INVALID, INVALID, INVALID, /* PUSH imm IMUL PUSH imm IMUL*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), /* INSB INSW OUTSB OUTSW*/ PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13), /* Jxx*/ /*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, /*80*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* TEST TEST XCHG XCHG*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MOV MOV MOV MOV*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* MOV from seg LEA MOV to seg POP*/ PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* NOP XCHG XCHG XCHG*/ /*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* XCHG XCHG XCHG XCHG*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* CBW CWD CALL far WAIT*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1), /* PUSHF POPF SAHF LAHF*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /* MOV MOV MOV MOV*/ /*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* MOVSB MOVSW CMPSB CMPSW*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), /* TEST TEST STOSB STOSW*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* LODSB LODSW SCASB SCASW*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), /* MOV*/ /*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* RET imm RET*/ /*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), /* LES LDS MOV MOV*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /* ENTER LEAVE RETF RETF*/ PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), /* INT3 INT INTO IRET*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /*d0*/ INVALID, INVALID, INVALID, INVALID, /* AAM AAD SETALC XLAT*/ PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* LOOPNE LOOPE LOOP JCXZ*/ /*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), /* IN AL IN AX OUT_AL OUT_AX*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), /* CALL JMP JMP JMP*/ PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG, /* IN AL IN AX OUT_AL OUT_AX*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12), /* REPNE REPE*/ /*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0), /* HLT CMC*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID, /* CLC STC CLI STI*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), /* CLD STD INCDEC*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_REG, INVALID }; static uint64_t opcode_timings_0f[256] = { /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*30*/ PAIR_NP | CYCLES(9), CYCLES(1), PAIR_NP | CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*60*/ PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, /*70*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_NP | CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, /*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10), /*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), /*d0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, /*e0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, /*f0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, }; static uint64_t opcode_timings_0f_mod3[256] = { /*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10), INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID, PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*10*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*30*/ PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(9), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*40*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*50*/ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*60*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /*70*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(100), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, /*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), /*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10), /*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), /*d0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, /*e0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, /*f0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, }; static uint64_t opcode_timings_shift[8] = { PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, }; static uint64_t opcode_timings_shift_mod3[8] = { PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, }; static uint64_t opcode_timings_f6[8] = { /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) }; static uint64_t opcode_timings_f6_mod3[8] = { /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22) }; static uint64_t opcode_timings_f7[8] = { /* TST NOT NEG*/ PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) }; static uint64_t opcode_timings_f7_mod3[8] = { /* TST NOT NEG*/ PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), /* MUL IMUL DIV IDIV*/ PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46) }; static uint64_t opcode_timings_ff[8] = { /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID }; static uint64_t opcode_timings_ff_mod3[8] = { /* INC DEC CALL CALL far*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0), /* JMP JMP far PUSH*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID }; static uint64_t opcode_timings_d8[8] = { /* FADDs FMULs FCOMs FCOMPs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBs FSUBRs FDIVs FDIVRs*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) }; static uint64_t opcode_timings_d8_mod3[8] = { /* FADD FMUL FCOM FCOMP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUB FSUBR FDIV FDIVR*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) }; static uint64_t opcode_timings_d9[8] = { /* FLDs FSTs FSTPs*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FLDENV FLDCW FSTENV FSTCW*/ PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0) }; static uint64_t opcode_timings_d9_mod3[64] = { /*FLD*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /*FXCH*/ PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), /*FNOP*/ PAIR_NP | FPU_CYCLES(3,0,0), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /*FSTP*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), /* opFCHS opFABS*/ PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), INVALID, INVALID, /* opFTST opFXAM*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(21,4,0), INVALID, INVALID, /* opFLD1 opFLDL2T opFLDL2E opFLDPI*/ PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), /* opFLDEG2 opFLDLN2 opFLDZ*/ PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(2,0,0), INVALID, /* opF2XM1 opFYL2X opFPTAN opFPATAN*/ PAIR_NP | FPU_CYCLES(53,2,2), PAIR_NP | FPU_CYCLES(103,2,2), PAIR_NP | FPU_CYCLES(120,36,0), PAIR_NP | FPU_CYCLES(112,2,2), /* opFDECSTP opFINCSTP,*/ INVALID, INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* opFPREM opFSQRT opFSINCOS*/ PAIR_NP | FPU_CYCLES(64,2,2), INVALID, PAIR_NP | FPU_CYCLES(70,69,2), PAIR_NP | FPU_CYCLES(89,2,2), /* opFRNDINT opFSCALE opFSIN opFCOS*/ PAIR_NP | FPU_CYCLES(9,0,0), PAIR_NP | FPU_CYCLES(20,5,0), PAIR_NP | FPU_CYCLES(65,2,2), PAIR_NP | FPU_CYCLES(65,2,2) }; static uint64_t opcode_timings_da[8] = { /* FIADDl FIMULl FICOMl FICOMPl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBl FISUBRl FIDIVl FIDIVRl*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) }; static uint64_t opcode_timings_da_mod3[8] = { INVALID, INVALID, INVALID, INVALID, /* FCOMPP*/ INVALID, PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID }; static uint64_t opcode_timings_db[8] = { /* FLDil FSTil FSTPil*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FLDe FSTPe*/ INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0) }; static uint64_t opcode_timings_db_mod3[64] = { INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, /* opFNOP opFCLEX opFINIT*/ INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(7,0,0), PAIR_NP | FPU_CYCLES(17,0,0), /* opFNOP opFNOP*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, }; static uint64_t opcode_timings_dc[8] = { /* FADDd FMULd FCOMd FCOMPd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBd FSUBRd FDIVd FDIVRd*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) }; static uint64_t opcode_timings_dc_mod3[8] = { /* opFADDr opFMULr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID, /* opFSUBRr opFSUBr opFDIVRr opFDIVr*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) }; static uint64_t opcode_timings_dd[8] = { /* FLDd FSTd FSTPd*/ PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0), /* FRSTOR FSAVE FSTSW*/ PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0) }; static uint64_t opcode_timings_dd_mod3[8] = { /* FFFREE FST FSTP*/ PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), /* FUCOM FUCOMP*/ PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID }; static uint64_t opcode_timings_de[8] = { /* FIADDw FIMULw FICOMw FICOMPw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0), /* FISUBw FISUBRw FIDIVw FIDIVRw*/ PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2) }; static uint64_t opcode_timings_de_mod3[8] = { /* FADDP FMULP FCOMPP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0), /* FSUBP FSUBRP FDIVP FDIVRP*/ PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2) }; static uint64_t opcode_timings_df[8] = { /* FILDiw FISTiw FISTPiw*/ PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0), /* FILDiq FBSTP FISTPiq*/ INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0) }; static uint64_t opcode_timings_df_mod3[8] = { INVALID, INVALID, INVALID, INVALID, /* FSTSW AX*/ PAIR_NP | FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID }; static uint64_t opcode_timings_81[8] = { PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632 }; static uint64_t opcode_timings_81_mod3[8] = { PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG }; static uint64_t opcode_timings_8x[8] = { PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8 }; static uint64_t opcode_timings_8x_mod3[8] = { PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG }; static int decode_delay, decode_delay_offset; static uint8_t last_prefix; static int prefixes; static inline int COUNT(uint64_t timings, uint64_t deps, int op_32) { if ((timings & PAIR_FPU) && !(deps & FPU_FXCH)) return FPU_I_LATENCY(timings); if (timings & CYCLES_HAS_MULTI) { if (op_32 & 0x100) return ((uintptr_t)timings >> 8) & 0xff; return (uintptr_t)timings & 0xff; } if (!(timings & PAIR_MASK)) return timings & 0xffff; if ((timings & PAIR_MASK) == PAIR_FX) return timings & 0xffff; if ((timings & PAIR_MASK) == PAIR_FXCH) return timings & 0xffff; if ((timings & PAIR_UV) && !(timings & PAIR_FPU)) timings &= 3; switch (timings & CYCLES_MASK) { case CYCLES_REG: return 1; case CYCLES_RM: return 2; case CYCLES_RMW: return 3; case CYCLES_BRANCH: return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2; } fatal("Illegal COUNT %016llx\n", timings); return timings; } static int codegen_fpu_latencies(uint64_t deps, int reg) { int latency = fpu_latency; if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency) latency = fpu_st_latency[0]; if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency) latency = fpu_st_latency[1]; if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency) latency = fpu_st_latency[reg]; return latency; } #define SUB_AND_CLAMP(latency, count) \ latency -= count; \ if (latency < 0) \ latency = 0 static void codegen_fpu_latency_clock(int count) { SUB_AND_CLAMP(fpu_latency, count); SUB_AND_CLAMP(fpu_st_latency[0], count); SUB_AND_CLAMP(fpu_st_latency[1], count); SUB_AND_CLAMP(fpu_st_latency[2], count); SUB_AND_CLAMP(fpu_st_latency[3], count); SUB_AND_CLAMP(fpu_st_latency[4], count); SUB_AND_CLAMP(fpu_st_latency[5], count); SUB_AND_CLAMP(fpu_st_latency[6], count); SUB_AND_CLAMP(fpu_st_latency[7], count); } static inline int codegen_timing_has_displacement(uint32_t fetchdat, int op_32) { if (op_32 & 0x200) { if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { /*Has SIB*/ if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500) return 1; } else { if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05) return 1; } } else { if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06) return 1; } return 0; } /*The instruction is only of interest here if it's longer than 7 bytes, as that's the limit on Pentium MMX parallel decoding*/ static inline int codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32) { int len = prefixes; if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) { len += 2; /*Opcode + ModR/M*/ if ((timing & CYCLES_HASIMM) == CYCLES_IMM8) len++; if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632) len += (op_32 & 0x100) ? 4 : 2; if (op_32 & 0x200) { if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) { /* Has SIB*/ len++; if ((fetchdat & 0xc0) == 0x40) len++; else if ((fetchdat & 0xc0) == 0x80) len += 4; else if ((fetchdat & 0x700) == 0x500) len += 4; } else { if ((fetchdat & 0xc0) == 0x40) len++; else if ((fetchdat & 0xc0) == 0x80) len += 4; else if ((fetchdat & 0xc7) == 0x05) len += 4; } } else { if ((fetchdat & 0xc0) == 0x40) len++; else if ((fetchdat & 0xc0) == 0x80) len += 2; else if ((fetchdat & 0xc7) == 0x06) len += 2; } } return len; } void codegen_timing_pentium_block_start() { u_pipe_full = decode_delay = decode_delay_offset = 0; } void codegen_timing_pentium_start() { // decode_delay = 0; last_prefix = 0; prefixes = 0; } void codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat) { prefixes++; if ((prefix & 0xf8) == 0xd8) { last_prefix = prefix; return; } if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) { /*On Pentium MMX 0fh prefix is 'free'*/ last_prefix = prefix; return; } if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) { /*On Pentium MMX 66h and 67h prefixes take 2 clocks*/ decode_delay_offset += 2; last_prefix = prefix; return; } if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) { /*On Pentium 0fh prefix is 'free' when used on conditional jumps*/ last_prefix = prefix; return; } /*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed by execution of previous instructions*/ decode_delay_offset++; last_prefix = prefix; } static int check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32) { uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32); /*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not cause AGIs with each other, but do with instructions that use it explicitly*/ if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP)) addr_regmask |= (1 << REG_ESP); return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP; } static void codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay) { int instr_cycles, latency = 0; if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7); else { /* if (timings[opcode] & FPU_WRITE_ST0) fatal("FPU_WRITE_ST0\n"); if (timings[opcode] & FPU_WRITE_ST1) fatal("FPU_WRITE_ST1\n"); if (timings[opcode] & FPU_WRITE_STREG) fatal("FPU_WRITE_STREG\n");*/ instr_cycles = 0; } if ((decode_delay + decode_delay_offset) > 0) codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles); else codegen_fpu_latency_clock(instr_cycles); instr_cycles += COUNT(timings[opcode], deps[opcode], op_32); instr_cycles += exec_delay; if ((decode_delay + decode_delay_offset) > 0) codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset; else codegen_block_cycles += instr_cycles; decode_delay = (-instr_cycles) + 1; if (deps[opcode] & FPU_POP) { int c; for (c = 0; c < 7; c++) fpu_st_latency[c] = fpu_st_latency[c+1]; fpu_st_latency[7] = 0; } if (deps[opcode] & FPU_POP2) { int c; for (c = 0; c < 6; c++) fpu_st_latency[c] = fpu_st_latency[c+2]; fpu_st_latency[6] = fpu_st_latency[7] = 0; } if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) { /* if (fpu_latency) fatal("Bad latency FPU\n");*/ fpu_latency = FPU_F_LATENCY(timings[opcode]); } if (deps[opcode] & FPU_PUSH) { int c; for (c = 0; c < 7; c++) fpu_st_latency[c+1] = fpu_st_latency[c]; fpu_st_latency[0] = 0; } if (deps[opcode] & FPU_WRITE_ST0) { /* if (fpu_st_latency[0]) fatal("Bad latency ST0\n");*/ fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]); } if (deps[opcode] & FPU_WRITE_ST1) { /* if (fpu_st_latency[1]) fatal("Bad latency ST1\n");*/ fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]); } if (deps[opcode] & FPU_WRITE_STREG) { int reg = fetchdat & 7; if (deps[opcode] & FPU_POP) reg--; if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) { fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]); } } } void codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc) { uint64_t *timings; uint64_t *deps; int mod3 = ((fetchdat & 0xc0) == 0xc0); int bit8 = !(opcode & 1); int agi_stall = 0; switch (last_prefix) { case 0x0f: timings = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f; deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f; break; case 0xd8: timings = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8; deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8; opcode = (opcode >> 3) & 7; break; case 0xd9: timings = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9; deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xda: timings = mod3 ? opcode_timings_da_mod3 : opcode_timings_da; deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da; opcode = (opcode >> 3) & 7; break; case 0xdb: timings = mod3 ? opcode_timings_db_mod3 : opcode_timings_db; deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db; opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7; break; case 0xdc: timings = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc; deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc; opcode = (opcode >> 3) & 7; break; case 0xdd: timings = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd; deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd; opcode = (opcode >> 3) & 7; break; case 0xde: timings = mod3 ? opcode_timings_de_mod3 : opcode_timings_de; deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de; opcode = (opcode >> 3) & 7; break; case 0xdf: timings = mod3 ? opcode_timings_df_mod3 : opcode_timings_df; deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df; opcode = (opcode >> 3) & 7; break; default: switch (opcode) { case 0x80: case 0x82: case 0x83: timings = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x; deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x; opcode = (fetchdat >> 3) & 7; break; case 0x81: timings = mod3 ? opcode_timings_81_mod3 : opcode_timings_81; deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81; opcode = (fetchdat >> 3) & 7; break; case 0xc0: case 0xc1: case 0xd0: case 0xd1: timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift; opcode = (fetchdat >> 3) & 7; break; case 0xd2: case 0xd3: timings = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift; deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl; opcode = (fetchdat >> 3) & 7; break; case 0xf6: timings = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6; deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6; opcode = (fetchdat >> 3) & 7; break; case 0xf7: timings = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7; deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7; opcode = (fetchdat >> 3) & 7; break; case 0xff: timings = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff; deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff; opcode = (fetchdat >> 3) & 7; break; default: timings = mod3 ? opcode_timings_mod3 : opcode_timings; deps = mod3 ? opcode_deps_mod3 : opcode_deps; break; } } if (u_pipe_full) { uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32); if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) != PAIR_FXCH) goto nopair; if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX) goto nopair; if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) == PAIR_FXCH) { int temp; if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); temp = fpu_st_latency[fetchdat & 7]; fpu_st_latency[fetchdat & 7] = fpu_st_latency[0]; fpu_st_latency[0] = temp; u_pipe_full = 0; decode_delay_offset = 0; regmask_modified = u_pipe_regmask; addr_regmask = 0; return; } if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay+decode_delay_offset+u_pipe_decode_delay_offset) <= 0) { int has_displacement; if (timings[opcode] & CYCLES_HASIMM) has_displacement = codegen_timing_has_displacement(fetchdat, op_32); else has_displacement = 0; if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK; int t2 = timings[opcode] & CYCLES_MASK; int t_pair; uint64_t temp_timing; uint64_t temp_deps = 0; if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU)) t1 &= 3; if (!(timings[opcode] & PAIR_FPU)) t2 &= 3; if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH) fatal("Pair out of range\n"); t_pair = pair_timings[t1][t2]; if (t_pair < 1) fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode); /*Instruction can pair with previous*/ temp_timing = t_pair; if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall); u_pipe_full = 0; decode_delay_offset = 0; regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask; addr_regmask = 0; return; } } nopair: /*Instruction can not pair with previous*/ /*Run previous now*/ if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) agi_stall = 1; codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall); u_pipe_full = 0; regmask_modified = u_pipe_regmask; addr_regmask = 0; } if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) { int has_displacement; if (timings[opcode] & CYCLES_HASIMM) has_displacement = codegen_timing_has_displacement(fetchdat, op_32); else has_displacement = 0; if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) { /*Instruction might pair with next*/ u_pipe_full = 1; u_pipe_opcode = opcode; u_pipe_timings = timings; u_pipe_op_32 = op_32; u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8); u_pipe_fetchdat = fetchdat; u_pipe_decode_delay_offset = decode_delay_offset; u_pipe_deps = deps; decode_delay_offset = 0; return; } } /*Instruction can not pair and must run now*/ if (check_agi(deps, opcode, fetchdat, op_32)) agi_stall = 1; codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall); decode_delay_offset = 0; regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8); addr_regmask = 0; } void codegen_timing_pentium_block_end() { if (u_pipe_full) { /*Run previous now*/ if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32)) codegen_block_cycles++; codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset; u_pipe_full = 0; } } int codegen_timing_pentium_jump_cycles() { return 0; } codegen_timing_t codegen_timing_pentium = { codegen_timing_pentium_start, codegen_timing_pentium_prefix, codegen_timing_pentium_opcode, codegen_timing_pentium_block_start, codegen_timing_pentium_block_end, codegen_timing_pentium_jump_cycles };