Files
86Box/src/cpu/codegen_timing_pentium.c
Jasmine Iwanek ad3eaf17a9 More unique names
2024-08-12 20:02:19 -04:00

1329 lines
82 KiB
C

/*Elements taken into account :
- U/V integer pairing
- FPU/FXCH pairing
- Prefix decode delay (including shadowing)
- FPU latencies
- AGI stalls
Elements not taken into account :
- Branch prediction (beyond most simplistic approximation)
- PMMX decode queue
- MMX latencies
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <wchar.h>
#include <86box/86box.h>
#include "cpu.h"
#include <86box/mem.h>
#include <86box/plat_unused.h>
#include <inttypes.h>
#include "x86.h"
#include "x86_ops.h"
#include "x87_sf.h"
#include "x87.h"
#include "codegen.h"
#include "codegen_ops.h"
#include "codegen_timing_common.h"
/*Instruction has different execution time for 16 and 32 bit data. Does not pair */
#define CYCLES_HAS_MULTI (1 << 28)
#define CYCLES_MULTI(c16, c32) (CYCLES_HAS_MULTI | c16 | (c32 << 8))
/*Instruction lasts given number of cycles. Does not pair*/
#define CYCLES(c) (c | PAIR_NP)
static int pair_timings[4][4] = {
/* Reg RM RMW Branch*/
/*Reg*/ {1, 2, 3, 2 },
/*RM*/
{ 2, 2, 3, 3 },
/*RMW*/
{ 3, 4, 5, 4 },
/*Branch*/
{ -1, -1, -1, -1}
};
/*Instruction follows either register timing, read-modify, or read-modify-write.
May be pairable*/
#define CYCLES_REG (0ull << 0)
#define CYCLES_RM (1ull << 0)
#define CYCLES_RMW (2ull << 0)
#define CYCLES_BRANCH (3ull << 0)
/*Instruction has immediate data. Can only be used with PAIR_U/PAIR_V/PAIR_UV*/
#define CYCLES_HASIMM (3ull << 2)
#define CYCLES_IMM8 (1ull << 2)
#define CYCLES_IMM1632 (2ull << 2)
#define CYCLES_MASK ((1ull << 7) - 1)
/*Instruction does not pair*/
#define PAIR_NP (0ull << 29)
/*Instruction pairs in U pipe only*/
#define PAIR_U (1ull << 29)
/*Instruction pairs in V pipe only*/
#define PAIR_V (2ull << 29)
/*Instruction pairs in both U and V pipes*/
#define PAIR_UV (3ull << 29)
/*Instruction pairs in U pipe only and only with FXCH*/
#define PAIR_FX (5ull << 29)
/*Instruction is FXCH and only pairs in V pipe with FX pairable instruction*/
#define PAIR_FXCH (6ull << 29)
#define PAIR_FPU (4ull << 29)
#define PAIR_MASK (7ull << 29)
/*comp_time = cycles until instruction complete
i_overlap = cycles that overlap with integer
f_overlap = cycles that overlap with subsequent FPU*/
#define FPU_CYCLES(comp_time, i_overlap, f_overlap) ((uint64_t) comp_time) | ((uint64_t) i_overlap << 41) | ((uint64_t) f_overlap << 49) | PAIR_FPU
#define FPU_COMP_TIME(timing) (timing & 0xff)
#define FPU_I_OVERLAP(timing) ((timing >> 41) & 0xff)
#define FPU_F_OVERLAP(timing) ((timing >> 49) & 0xff)
#define FPU_I_LATENCY(timing) (FPU_COMP_TIME(timing) - FPU_I_OVERLAP(timing))
#define FPU_F_LATENCY(timing) (FPU_I_OVERLAP(timing) - FPU_F_OVERLAP(timing))
#define FPU_RESULT_LATENCY(timing) ((timing >> 41) & 0xff)
#define INVALID 0
static int u_pipe_full;
static uint32_t u_pipe_opcode;
static uint64_t *u_pipe_timings;
static uint32_t u_pipe_op_32;
static uint32_t u_pipe_regmask;
static uint32_t u_pipe_fetchdat;
static int u_pipe_decode_delay_offset;
static uint64_t *u_pipe_deps;
static uint32_t regmask_modified;
static uint32_t addr_regmask;
static int fpu_latency;
static int fpu_st_latency[8];
static uint64_t opcode_timings_p6[256] = {
// clang-format off
/* ADD ADD ADD ADD*/
/*00*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* ADD ADD PUSH ES POP ES*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* OR OR OR OR*/
PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* OR OR PUSH CS */
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID,
/* ADC ADC ADC ADC*/
/*10*/ PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
/* ADC ADC PUSH SS POP SS*/
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* SBB SBB SBB SBB*/
PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
/* SBB SBB PUSH DS POP DS*/
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* AND AND AND AND*/
/*20*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* AND AND DAA*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* SUB SUB SUB SUB*/
PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* SUB SUB DAS*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* XOR XOR XOR XOR*/
/*30*/ PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* XOR XOR AAA*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* CMP CMP CMP CMP*/
PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM,
/* CMP CMP AAS*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* INC EAX INC ECX INC EDX INC EBX*/
/*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* INC ESP INC EBP INC ESI INC EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
/*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* POP EAX POP ECX POP EDX POP EBX*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* POP ESP POP EBP POP ESI POP EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSHA POPA BOUND ARPL*/
/*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7),
INVALID, INVALID, INVALID, INVALID,
/* PUSH imm IMUL PUSH imm IMUL*/
PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10),
/* INSB INSW OUTSB OUTSW*/
PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13),
/* Jxx*/
/*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
/*80*/ INVALID, INVALID, INVALID, INVALID,
/* TEST TEST XCHG XCHG*/
PAIR_UV | CYCLES_RM, PAIR_UV | CYCLES_RM, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MOV MOV MOV MOV*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV,
/* MOV from seg LEA MOV to seg POP*/
PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, CYCLES(3), PAIR_NP | CYCLES(3),
/* NOP XCHG XCHG XCHG*/
/*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* XCHG XCHG XCHG XCHG*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* CBW CWD CALL far WAIT*/
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1),
/* PUSHF POPF SAHF LAHF*/
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* MOV MOV MOV MOV*/
/*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* MOVSB MOVSW CMPSB CMPSW*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5),
/* TEST TEST STOSB STOSW*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* LODSB LODSW SCASB SCASW*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4),
/* MOV*/
/*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* RET imm RET*/
/*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2),
/* LES LDS MOV MOV*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* ENTER LEAVE RETF RETF*/
PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0),
/* INT3 INT INTO IRET*/
PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0),
/*d0*/ INVALID, INVALID, INVALID, INVALID,
/* AAM AAD SETALC XLAT*/
PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4),
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/* LOOPNE LOOPE LOOP JCXZ*/
/*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5),
/* IN AL IN AX OUT_AL OUT_AX*/
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12),
/* CALL JMP JMP JMP*/
PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG,
/* IN AL IN AX OUT_AL OUT_AX*/
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12),
/* REPNE REPE*/
/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0),
/* HLT CMC*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID,
/* CLC STC CLI STI*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7),
/* CLD STD INCDEC*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_RMW, INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_mod3[256] = {
// clang-format off
/* ADD ADD ADD ADD*/
/*00*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* ADD ADD PUSH ES POP ES*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* OR OR OR OR*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* OR OR PUSH CS */
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(1), INVALID,
/* ADC ADC ADC ADC*/
/*10*/ PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG,
/* ADC ADC PUSH SS POP SS*/
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* SBB SBB SBB SBB*/
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG,
/* SBB SBB PUSH DS POP DS*/
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(3),
/* AND AND AND AND*/
/*20*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* AND AND DAA*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* SUB SUB SUB SUB*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* SUB SUB DAS*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* XOR XOR XOR XOR*/
/*30*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* XOR XOR AAA*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* CMP CMP CMP CMP*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* CMP CMP AAS*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3),
/* INC EAX INC ECX INC EDX INC EBX*/
/*40*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* INC ESP INC EBP INC ESI INC EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
/*50*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* POP EAX POP ECX POP EDX POP EBX*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* POP ESP POP EBP POP ESI POP EDI*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* PUSHA POPA BOUND ARPL*/
/*60*/ PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(8), PAIR_NP | CYCLES(7),
INVALID, INVALID, INVALID, INVALID,
/* PUSH imm IMUL PUSH imm IMUL*/
PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(10),
/* INSB INSW OUTSB OUTSW*/
PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(13), PAIR_NP | CYCLES(13),
/* Jxx*/
/*70*/ PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH, PAIR_V | CYCLES_BRANCH,
/*80*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* TEST TEST XCHG XCHG*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MOV MOV MOV MOV*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* MOV from seg LEA MOV to seg POP*/
PAIR_NP | CYCLES(1), PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* NOP XCHG XCHG XCHG*/
/*90*/ PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* XCHG XCHG XCHG XCHG*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* CBW CWD CALL far WAIT*/
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(1),
/* PUSHF POPF SAHF LAHF*/
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/* MOV MOV MOV MOV*/
/*a0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* MOVSB MOVSW CMPSB CMPSW*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5),
/* TEST TEST STOSB STOSW*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* LODSB LODSW SCASB SCASW*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4),
/* MOV*/
/*b0*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* RET imm RET*/
/*c0*/ INVALID, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(2),
/* LES LDS MOV MOV*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/* ENTER LEAVE RETF RETF*/
PAIR_NP | CYCLES(15), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0),
/* INT3 INT INTO IRET*/
PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0),
/*d0*/ INVALID, INVALID, INVALID, INVALID,
/* AAM AAD SETALC XLAT*/
PAIR_NP | CYCLES(18), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(4),
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/* LOOPNE LOOPE LOOP JCXZ*/
/*e0*/ PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(5), PAIR_NP | CYCLES(5),
/* IN AL IN AX OUT_AL OUT_AX*/
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12),
/* CALL JMP JMP JMP*/
PAIR_V | CYCLES_REG, PAIR_V | CYCLES_REG, PAIR_NP | CYCLES(0), PAIR_V | CYCLES_REG,
/* IN AL IN AX OUT_AL OUT_AX*/
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(12), PAIR_NP | CYCLES(12),
/* REPNE REPE*/
/*f0*/ INVALID, INVALID, PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(0),
/* HLT CMC*/
PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(2), INVALID, INVALID,
/* CLC STC CLI STI*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7),
/* CLD STD INCDEC*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_UV | CYCLES_REG, INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_0f[256] = {
// clang-format off
/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10),
INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID,
PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*10*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6),
PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*30*/ PAIR_NP | CYCLES(9), CYCLES(1), PAIR_NP | CYCLES(9), INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*40*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*50*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*60*/ PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
/*70*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_NP | CYCLES(100),
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
/*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID,
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10),
/*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1),
PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1),
/*d0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM,
/*e0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID,
INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID, PAIR_U | CYCLES_RM,
/*f0*/ INVALID, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM,
INVALID, PAIR_U | CYCLES_RM, INVALID, INVALID,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID,
PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, PAIR_U | CYCLES_RM, INVALID,
// clang-format on
};
static uint64_t opcode_timings_p6_0f_mod3[256] = {
// clang-format off
/*00*/ PAIR_NP | CYCLES(20), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(10),
INVALID, PAIR_NP | CYCLES(195), PAIR_NP | CYCLES(7), INVALID,
PAIR_NP | CYCLES(1000), PAIR_NP | CYCLES(10000), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*10*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*20*/ PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6),
PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*30*/ PAIR_NP | CYCLES(9), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(9), INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*40*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*50*/ INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*60*/ PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/*70*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(100),
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
/*80*/ PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(2),
/*90*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/*a0*/ PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(14), PAIR_NP | CYCLES(8),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(4), INVALID, INVALID,
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3), INVALID, PAIR_NP | CYCLES(10),
/*b0*/ PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(10), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
INVALID, INVALID, PAIR_NP | CYCLES(6), PAIR_NP | CYCLES(13),
PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(7), PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/*c0*/ PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(4), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1),
PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1), PAIR_NP | CYCLES(1),
/*d0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG,
/*e0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID,
INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID, PAIR_UV | CYCLES_REG,
/*f0*/ INVALID, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
INVALID, PAIR_UV | CYCLES_REG, INVALID, INVALID,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, INVALID,
// clang-format on
};
static uint64_t opcode_timings_p6_shift[8] = {
// clang-format off
PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW,
PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW, PAIR_U | CYCLES_RMW,
// clang-format on
};
static uint64_t opcode_timings_p6_shift_mod3[8] = {
// clang-format off
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG,
PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG, PAIR_U | CYCLES_REG,
// clang-format on
};
static uint64_t opcode_timings_p6_f6[8] = {
// clang-format off
/* TST NOT NEG*/
PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MUL IMUL DIV IDIV*/
PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22)
// clang-format on
};
static uint64_t opcode_timings_p6_f6_mod3[8] = {
// clang-format off
/* TST NOT NEG*/
PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MUL IMUL DIV IDIV*/
PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(11), PAIR_NP | CYCLES(17), PAIR_NP | CYCLES(22)
// clang-format on
};
static uint64_t opcode_timings_p6_f7[8] = {
// clang-format off
/* TST NOT NEG*/
PAIR_UV | CYCLES_RM, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MUL IMUL DIV IDIV*/
PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46)
// clang-format on
};
static uint64_t opcode_timings_p6_f7_mod3[8] = {
// clang-format off
/* TST NOT NEG*/
PAIR_UV | CYCLES_REG, INVALID, PAIR_NP | CYCLES(3), PAIR_NP | CYCLES(3),
/* MUL IMUL DIV IDIV*/
PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(11,10), PAIR_NP | CYCLES_MULTI(25,41), PAIR_NP | CYCLES_MULTI(30,46)
// clang-format on
};
static uint64_t opcode_timings_p6_ff[8] = {
// clang-format off
/* INC DEC CALL CALL far*/
PAIR_UV | CYCLES_RMW, PAIR_UV | CYCLES_RMW, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0),
/* JMP JMP far PUSH*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_ff_mod3[8] = {
// clang-format off
/* INC DEC CALL CALL far*/
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_NP | CYCLES(4), PAIR_NP | CYCLES(0),
/* JMP JMP far PUSH*/
PAIR_NP | CYCLES(2), PAIR_NP | CYCLES(0), PAIR_NP | CYCLES(2), INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_d8[8] = {
// clang-format off
/* FADDs FMULs FCOMs FCOMPs*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0),
/* FSUBs FSUBRs FDIVs FDIVRs*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_d8_mod3[8] = {
// clang-format off
/* FADD FMUL FCOM FCOMP*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0),
/* FSUB FSUBR FDIV FDIVR*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_d9[8] = {
// clang-format off
/* FLDs FSTs FSTPs*/
PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0),
/* FLDENV FLDCW FSTENV FSTCW*/
PAIR_NP | FPU_CYCLES(32,0,0), PAIR_NP | FPU_CYCLES(8,0,0), PAIR_NP | FPU_CYCLES(48,0,0), PAIR_NP | FPU_CYCLES(2,0,0)
// clang-format on
};
static uint64_t opcode_timings_p6_d9_mod3[64] = {
// clang-format off
/*FLD*/
PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0),
PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0),
/*FXCH*/
PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0),
PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0), PAIR_FXCH | CYCLES(0),
/*FNOP*/
PAIR_NP | FPU_CYCLES(3,0,0), INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/*FSTP*/
PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0),
PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0),
/* opFCHS opFABS*/
PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0), INVALID, INVALID,
/* opFTST opFXAM*/
PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(21,4,0), INVALID, INVALID,
/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/
PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2),
/* opFLDEG2 opFLDLN2 opFLDZ*/
PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(5,2,2), PAIR_NP | FPU_CYCLES(2,0,0), INVALID,
/* opF2XM1 opFYL2X opFPTAN opFPATAN*/
PAIR_NP | FPU_CYCLES(53,2,2), PAIR_NP | FPU_CYCLES(103,2,2), PAIR_NP | FPU_CYCLES(120,36,0), PAIR_NP | FPU_CYCLES(112,2,2),
/* opFDECSTP opFINCSTP,*/
INVALID, INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0),
/* opFPREM opFSQRT opFSINCOS*/
PAIR_NP | FPU_CYCLES(64,2,2), INVALID, PAIR_NP | FPU_CYCLES(70,69,2), PAIR_NP | FPU_CYCLES(89,2,2),
/* opFRNDINT opFSCALE opFSIN opFCOS*/
PAIR_NP | FPU_CYCLES(9,0,0), PAIR_NP | FPU_CYCLES(20,5,0), PAIR_NP | FPU_CYCLES(65,2,2), PAIR_NP | FPU_CYCLES(65,2,2)
// clang-format on
};
static uint64_t opcode_timings_p6_da[8] = {
// clang-format off
/* FIADDl FIMULl FICOMl FICOMPl*/
PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0),
/* FISUBl FISUBRl FIDIVl FIDIVRl*/
PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_da_mod3[8] = {
// clang-format off
INVALID, INVALID, INVALID, INVALID,
/* FCOMPP*/
INVALID, PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_db[8] = {
// clang-format off
/* FLDil FSTil FSTPil*/
PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0),
/* FLDe FSTPe*/
INVALID, PAIR_NP | FPU_CYCLES(3,0,0), INVALID, PAIR_NP | FPU_CYCLES(3,0,0)
// clang-format on
};
static uint64_t opcode_timings_p6_db_mod3[64] = {
// clang-format off
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
/* opFNOP opFCLEX opFINIT*/
INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(7,0,0), PAIR_NP | FPU_CYCLES(17,0,0),
/* opFNOP opFNOP*/
PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
INVALID, INVALID, INVALID, INVALID,
// clang-format on
};
static uint64_t opcode_timings_p6_dc[8] = {
// clang-format off
/* FADDd FMULd FCOMd FCOMPd*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(1,0,0), PAIR_FX | FPU_CYCLES(1,0,0),
/* FSUBd FSUBRd FDIVd FDIVRd*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_dc_mod3[8] = {
// clang-format off
/* opFADDr opFMULr*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, INVALID,
/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_dd[8] = {
// clang-format off
/* FLDd FSTd FSTPd*/
PAIR_FX | FPU_CYCLES(1,0,0), INVALID, PAIR_NP | FPU_CYCLES(2,0,0), PAIR_NP | FPU_CYCLES(2,0,0),
/* FRSTOR FSAVE FSTSW*/
PAIR_NP | FPU_CYCLES(70,0,0), INVALID, PAIR_NP | FPU_CYCLES(127,0,0), PAIR_NP | FPU_CYCLES(6,0,0)
// clang-format on
};
static uint64_t opcode_timings_p6_dd_mod3[8] = {
// clang-format off
/* FFFREE FST FSTP*/
PAIR_NP | FPU_CYCLES(2,0,0), INVALID, PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0),
/* FUCOM FUCOMP*/
PAIR_NP | FPU_CYCLES(1,0,0), PAIR_NP | FPU_CYCLES(1,0,0), INVALID, INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_de[8] = {
// clang-format off
/* FIADDw FIMULw FICOMw FICOMPw*/
PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(4,0,0), PAIR_NP | FPU_CYCLES(4,0,0),
/* FISUBw FISUBRw FIDIVw FIDIVRw*/
PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(6,2,2), PAIR_NP | FPU_CYCLES(42,38,2), PAIR_NP | FPU_CYCLES(42,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_de_mod3[8] = {
// clang-format off
/* FADDP FMULP FCOMPP*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), INVALID, PAIR_FX | FPU_CYCLES(1,0,0),
/* FSUBP FSUBRP FDIVP FDIVRP*/
PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(3,2,2), PAIR_FX | FPU_CYCLES(39,38,2), PAIR_FX | FPU_CYCLES(39,38,2)
// clang-format on
};
static uint64_t opcode_timings_p6_df[8] = {
// clang-format off
/* FILDiw FISTiw FISTPiw*/
PAIR_NP | FPU_CYCLES(3,2,2), INVALID, PAIR_NP | FPU_CYCLES(6,0,0), PAIR_NP | FPU_CYCLES(6,0,0),
/* FILDiq FBSTP FISTPiq*/
INVALID, PAIR_NP | FPU_CYCLES(3,2,2), PAIR_NP | FPU_CYCLES(148,0,0), PAIR_NP | FPU_CYCLES(6,0,0)
// clang-format on
};
static uint64_t opcode_timings_p6_df_mod3[8] = {
// clang-format off
INVALID, INVALID, INVALID, INVALID,
/* FSTSW AX*/
PAIR_NP | FPU_CYCLES(6,0,0), INVALID, INVALID, INVALID
// clang-format on
};
static uint64_t opcode_timings_p6_81[8] = {
// clang-format off
PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632,
PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RMW | CYCLES_IMM1632, PAIR_UV | CYCLES_RM | CYCLES_IMM1632
// clang-format on
};
static uint64_t opcode_timings_p6_81_mod3[8] = {
// clang-format off
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG
// clang-format on
};
static uint64_t opcode_timings_p6_8x[8] = {
// clang-format off
PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8,
PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RMW | CYCLES_IMM8, PAIR_UV | CYCLES_RM | CYCLES_IMM8
// clang-format on
};
static uint64_t opcode_timings_p6_8x_mod3[8] = {
// clang-format off
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG,
PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG, PAIR_UV | CYCLES_REG
// clang-format on
};
static int decode_delay;
static int decode_delay_offset;
static uint8_t last_prefix;
static int prefixes;
static inline int
COUNT(uint64_t timings, uint64_t deps, int op_32)
{
if ((timings & PAIR_FPU) && !(deps & FPU_FXCH))
return FPU_I_LATENCY(timings);
if (timings & CYCLES_HAS_MULTI) {
if (op_32 & 0x100)
return ((uintptr_t) timings >> 8) & 0xff;
return (uintptr_t) timings & 0xff;
}
if (!(timings & PAIR_MASK))
return timings & 0xffff;
if ((timings & PAIR_MASK) == PAIR_FX)
return timings & 0xffff;
if ((timings & PAIR_MASK) == PAIR_FXCH)
return timings & 0xffff;
if ((timings & PAIR_UV) && !(timings & PAIR_FPU))
timings &= 3;
switch (timings & CYCLES_MASK) {
case CYCLES_REG:
return 1;
case CYCLES_RM:
return 2;
case CYCLES_RMW:
return 3;
case CYCLES_BRANCH:
return cpu_has_feature(CPU_FEATURE_MMX) ? 1 : 2;
}
fatal("Illegal COUNT %016" PRIu64 "\n", timings);
return timings;
}
static int
codegen_fpu_latencies(uint64_t deps, int reg)
{
int latency = fpu_latency;
if ((deps & FPU_RW_ST0) && fpu_st_latency[0] && fpu_st_latency[0] > latency)
latency = fpu_st_latency[0];
if ((deps & FPU_RW_ST1) && fpu_st_latency[1] && fpu_st_latency[1] > latency)
latency = fpu_st_latency[1];
if ((deps & FPU_RW_STREG) && fpu_st_latency[reg] && fpu_st_latency[reg] > latency)
latency = fpu_st_latency[reg];
return latency;
}
#define SUB_AND_CLAMP(latency, count) \
latency -= count; \
if (latency < 0) \
latency = 0
static void
codegen_fpu_latency_clock(int count)
{
SUB_AND_CLAMP(fpu_latency, count);
SUB_AND_CLAMP(fpu_st_latency[0], count);
SUB_AND_CLAMP(fpu_st_latency[1], count);
SUB_AND_CLAMP(fpu_st_latency[2], count);
SUB_AND_CLAMP(fpu_st_latency[3], count);
SUB_AND_CLAMP(fpu_st_latency[4], count);
SUB_AND_CLAMP(fpu_st_latency[5], count);
SUB_AND_CLAMP(fpu_st_latency[6], count);
SUB_AND_CLAMP(fpu_st_latency[7], count);
}
static inline int
codegen_timing_has_displacement(uint32_t fetchdat, int op_32)
{
if (op_32 & 0x200) {
if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) {
/*Has SIB*/
if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0x700) == 0x500)
return 1;
} else {
if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x05)
return 1;
}
} else {
if ((fetchdat & 0xc0) == 0x40 || (fetchdat & 0xc0) == 0x80 || (fetchdat & 0xc7) == 0x06)
return 1;
}
return 0;
}
/*The instruction is only of interest here if it's longer than 7 bytes, as that's the
limit on Pentium MMX parallel decoding*/
static inline int
codegen_timing_instr_length(uint64_t timing, uint32_t fetchdat, int op_32)
{
int len = prefixes;
if ((timing & CYCLES_MASK) == CYCLES_RM || (timing & CYCLES_MASK) == CYCLES_RMW) {
len += 2; /*Opcode + ModR/M*/
if ((timing & CYCLES_HASIMM) == CYCLES_IMM8)
len++;
if ((timing & CYCLES_HASIMM) == CYCLES_IMM1632)
len += (op_32 & 0x100) ? 4 : 2;
if (op_32 & 0x200) {
if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) {
/* Has SIB*/
len++;
if ((fetchdat & 0xc0) == 0x40)
len++;
else if ((fetchdat & 0xc0) == 0x80)
len += 4;
else if ((fetchdat & 0x700) == 0x500)
len += 4;
} else {
if ((fetchdat & 0xc0) == 0x40)
len++;
else if ((fetchdat & 0xc0) == 0x80)
len += 4;
else if ((fetchdat & 0xc7) == 0x05)
len += 4;
}
} else {
if ((fetchdat & 0xc0) == 0x40)
len++;
else if ((fetchdat & 0xc0) == 0x80)
len += 2;
else if ((fetchdat & 0xc7) == 0x06)
len += 2;
}
}
return len;
}
void
codegen_timing_pentium_block_start(void)
{
u_pipe_full = decode_delay = decode_delay_offset = 0;
}
void
codegen_timing_pentium_start(void)
{
last_prefix = 0;
prefixes = 0;
}
void
codegen_timing_pentium_prefix(uint8_t prefix, uint32_t fetchdat)
{
prefixes++;
if ((prefix & 0xf8) == 0xd8) {
last_prefix = prefix;
return;
}
if (cpu_has_feature(CPU_FEATURE_MMX) && prefix == 0x0f) {
/*On Pentium MMX 0fh prefix is 'free'*/
last_prefix = prefix;
return;
}
if (cpu_has_feature(CPU_FEATURE_MMX) && (prefix == 0x66 || prefix == 0x67)) {
/*On Pentium MMX 66h and 67h prefixes take 2 clocks*/
decode_delay_offset += 2;
last_prefix = prefix;
return;
}
if (prefix == 0x0f && (fetchdat & 0xf0) == 0x80) {
/*On Pentium 0fh prefix is 'free' when used on conditional jumps*/
last_prefix = prefix;
return;
}
/*On Pentium all prefixes take 1 cycle to decode. Decode may be shadowed
by execution of previous instructions*/
decode_delay_offset++;
last_prefix = prefix;
}
static int
check_agi(uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int op_32)
{
uint32_t addr_regmask = get_addr_regmask(deps[opcode], fetchdat, op_32);
/*Instructions that use ESP implicitly (eg PUSH, POP, CALL etc) do not
cause AGIs with each other, but do with instructions that use it explicitly*/
if ((addr_regmask & REGMASK_IMPL_ESP) && (regmask_modified & (1 << REG_ESP)) && !(regmask_modified & REGMASK_IMPL_ESP))
addr_regmask |= (1 << REG_ESP);
return (regmask_modified & addr_regmask) & ~REGMASK_IMPL_ESP;
}
static void
codegen_instruction(uint64_t *timings, uint64_t *deps, uint8_t opcode, uint32_t fetchdat, int decode_delay_offset, int op_32, int exec_delay)
{
int instr_cycles;
int latency = 0;
if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH))
instr_cycles = latency = codegen_fpu_latencies(deps[opcode], fetchdat & 7);
else {
#if 0
if (timings[opcode] & FPU_WRITE_ST0)
fatal("FPU_WRITE_ST0\n");
if (timings[opcode] & FPU_WRITE_ST1)
fatal("FPU_WRITE_ST1\n");
if (timings[opcode] & FPU_WRITE_STREG)
fatal("FPU_WRITE_STREG\n");*/
#endif
instr_cycles = 0;
}
if ((decode_delay + decode_delay_offset) > 0)
codegen_fpu_latency_clock(decode_delay + decode_delay_offset + instr_cycles);
else
codegen_fpu_latency_clock(instr_cycles);
instr_cycles += COUNT(timings[opcode], deps[opcode], op_32);
instr_cycles += exec_delay;
if ((decode_delay + decode_delay_offset) > 0)
codegen_block_cycles += instr_cycles + decode_delay + decode_delay_offset;
else
codegen_block_cycles += instr_cycles;
decode_delay = (-instr_cycles) + 1;
if (deps[opcode] & FPU_POP) {
for (uint8_t c = 0; c < 7; c++)
fpu_st_latency[c] = fpu_st_latency[c + 1];
fpu_st_latency[7] = 0;
}
if (deps[opcode] & FPU_POP2) {
for (uint8_t c = 0; c < 6; c++)
fpu_st_latency[c] = fpu_st_latency[c + 2];
fpu_st_latency[6] = fpu_st_latency[7] = 0;
}
if ((timings[opcode] & PAIR_FPU) && !(deps[opcode] & FPU_FXCH)) {
fpu_latency = FPU_F_LATENCY(timings[opcode]);
}
if (deps[opcode] & FPU_PUSH) {
for (uint8_t c = 0; c < 7; c++)
fpu_st_latency[c + 1] = fpu_st_latency[c];
fpu_st_latency[0] = 0;
}
if (deps[opcode] & FPU_WRITE_ST0) {
#if 0
if (fpu_st_latency[0])
fatal("Bad latency ST0\n");*/
#endif
fpu_st_latency[0] = FPU_RESULT_LATENCY(timings[opcode]);
}
if (deps[opcode] & FPU_WRITE_ST1) {
#if 0
if (fpu_st_latency[1])
fatal("Bad latency ST1\n");*/
#endif
fpu_st_latency[1] = FPU_RESULT_LATENCY(timings[opcode]);
}
if (deps[opcode] & FPU_WRITE_STREG) {
int reg = fetchdat & 7;
if (deps[opcode] & FPU_POP)
reg--;
if (reg >= 0 && !(reg == 0 && (deps[opcode] & FPU_WRITE_ST0)) && !(reg == 1 && (deps[opcode] & FPU_WRITE_ST1))) {
fpu_st_latency[reg] = FPU_RESULT_LATENCY(timings[opcode]);
}
}
}
void
codegen_timing_pentium_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, UNUSED(uint32_t op_pc))
{
uint64_t *timings;
uint64_t *deps;
int mod3 = ((fetchdat & 0xc0) == 0xc0);
int bit8 = !(opcode & 1);
int agi_stall = 0;
switch (last_prefix) {
case 0x0f:
timings = mod3 ? opcode_timings_p6_0f_mod3 : opcode_timings_p6_0f;
deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f;
break;
case 0xd8:
timings = mod3 ? opcode_timings_p6_d8_mod3 : opcode_timings_p6_d8;
deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8;
opcode = (opcode >> 3) & 7;
break;
case 0xd9:
timings = mod3 ? opcode_timings_p6_d9_mod3 : opcode_timings_p6_d9;
deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9;
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
break;
case 0xda:
timings = mod3 ? opcode_timings_p6_da_mod3 : opcode_timings_p6_da;
deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da;
opcode = (opcode >> 3) & 7;
break;
case 0xdb:
timings = mod3 ? opcode_timings_p6_db_mod3 : opcode_timings_p6_db;
deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db;
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
break;
case 0xdc:
timings = mod3 ? opcode_timings_p6_dc_mod3 : opcode_timings_p6_dc;
deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc;
opcode = (opcode >> 3) & 7;
break;
case 0xdd:
timings = mod3 ? opcode_timings_p6_dd_mod3 : opcode_timings_p6_dd;
deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd;
opcode = (opcode >> 3) & 7;
break;
case 0xde:
timings = mod3 ? opcode_timings_p6_de_mod3 : opcode_timings_p6_de;
deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de;
opcode = (opcode >> 3) & 7;
break;
case 0xdf:
timings = mod3 ? opcode_timings_p6_df_mod3 : opcode_timings_p6_df;
deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df;
opcode = (opcode >> 3) & 7;
break;
default:
switch (opcode) {
case 0x80:
case 0x82:
case 0x83:
timings = mod3 ? opcode_timings_p6_8x_mod3 : opcode_timings_p6_8x;
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
opcode = (fetchdat >> 3) & 7;
break;
case 0x81:
timings = mod3 ? opcode_timings_p6_81_mod3 : opcode_timings_p6_81;
deps = mod3 ? opcode_deps_81_mod3 : opcode_deps_81;
opcode = (fetchdat >> 3) & 7;
break;
case 0xc0:
case 0xc1:
case 0xd0:
case 0xd1:
timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift;
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
opcode = (fetchdat >> 3) & 7;
break;
case 0xd2:
case 0xd3:
timings = mod3 ? opcode_timings_p6_shift_mod3 : opcode_timings_p6_shift;
deps = mod3 ? opcode_deps_shift_cl_mod3 : opcode_deps_shift_cl;
opcode = (fetchdat >> 3) & 7;
break;
case 0xf6:
timings = mod3 ? opcode_timings_p6_f6_mod3 : opcode_timings_p6_f6;
deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6;
opcode = (fetchdat >> 3) & 7;
break;
case 0xf7:
timings = mod3 ? opcode_timings_p6_f7_mod3 : opcode_timings_p6_f7;
deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7;
opcode = (fetchdat >> 3) & 7;
break;
case 0xff:
timings = mod3 ? opcode_timings_p6_ff_mod3 : opcode_timings_p6_ff;
deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff;
opcode = (fetchdat >> 3) & 7;
break;
default:
timings = mod3 ? opcode_timings_p6_mod3 : opcode_timings_p6;
deps = mod3 ? opcode_deps_mod3 : opcode_deps;
break;
}
}
if (u_pipe_full) {
uint8_t regmask = get_srcdep_mask(deps[opcode], fetchdat, bit8, u_pipe_op_32);
if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) != PAIR_FXCH)
goto nopair;
if ((timings[opcode] & PAIR_MASK) == PAIR_FXCH && (u_pipe_timings[u_pipe_opcode] & PAIR_MASK) != PAIR_FX)
goto nopair;
if ((u_pipe_timings[u_pipe_opcode] & PAIR_MASK) == PAIR_FX && (timings[opcode] & PAIR_MASK) == PAIR_FXCH) {
int temp;
if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
agi_stall = 1;
codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall);
temp = fpu_st_latency[fetchdat & 7];
fpu_st_latency[fetchdat & 7] = fpu_st_latency[0];
fpu_st_latency[0] = temp;
u_pipe_full = 0;
decode_delay_offset = 0;
regmask_modified = u_pipe_regmask;
addr_regmask = 0;
return;
}
if ((timings[opcode] & PAIR_V) && !(u_pipe_regmask & regmask) && (decode_delay + decode_delay_offset + u_pipe_decode_delay_offset) <= 0) {
int has_displacement;
if (timings[opcode] & CYCLES_HASIMM)
has_displacement = codegen_timing_has_displacement(fetchdat, op_32);
else
has_displacement = 0;
if (!has_displacement && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) {
int t1 = u_pipe_timings[u_pipe_opcode] & CYCLES_MASK;
int t2 = timings[opcode] & CYCLES_MASK;
int t_pair;
uint64_t temp_timing;
uint64_t temp_deps = 0;
if (!(u_pipe_timings[u_pipe_opcode] & PAIR_FPU))
t1 &= 3;
if (!(timings[opcode] & PAIR_FPU))
t2 &= 3;
if (t1 < 0 || t2 < 0 || t1 > CYCLES_BRANCH || t2 > CYCLES_BRANCH)
fatal("Pair out of range\n");
t_pair = pair_timings[t1][t2];
if (t_pair < 1)
fatal("Illegal pair timings : t1=%i t2=%i u_opcode=%02x v_opcode=%02x\n", t1, t2, u_pipe_opcode, opcode);
/*Instruction can pair with previous*/
temp_timing = t_pair;
if (check_agi(deps, opcode, fetchdat, op_32) || check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
agi_stall = 1;
codegen_instruction(&temp_timing, &temp_deps, 0, 0, 0, 0, agi_stall);
u_pipe_full = 0;
decode_delay_offset = 0;
regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8) | u_pipe_regmask;
addr_regmask = 0;
return;
}
}
nopair:
/*Instruction can not pair with previous*/
/*Run previous now*/
if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
agi_stall = 1;
codegen_instruction(u_pipe_timings, u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_decode_delay_offset, u_pipe_op_32, agi_stall);
u_pipe_full = 0;
regmask_modified = u_pipe_regmask;
addr_regmask = 0;
}
if ((timings[opcode] & PAIR_U) && (decode_delay + decode_delay_offset) <= 0) {
int has_displacement;
if (timings[opcode] & CYCLES_HASIMM)
has_displacement = codegen_timing_has_displacement(fetchdat, op_32);
else
has_displacement = 0;
if ((!has_displacement || cpu_has_feature(CPU_FEATURE_MMX)) && (!cpu_has_feature(CPU_FEATURE_MMX) || codegen_timing_instr_length(timings[opcode], fetchdat, op_32) <= 7)) {
/*Instruction might pair with next*/
u_pipe_full = 1;
u_pipe_opcode = opcode;
u_pipe_timings = timings;
u_pipe_op_32 = op_32;
u_pipe_regmask = get_dstdep_mask(deps[opcode], fetchdat, bit8);
u_pipe_fetchdat = fetchdat;
u_pipe_decode_delay_offset = decode_delay_offset;
u_pipe_deps = deps;
decode_delay_offset = 0;
return;
}
}
/*Instruction can not pair and must run now*/
if (check_agi(deps, opcode, fetchdat, op_32))
agi_stall = 1;
codegen_instruction(timings, deps, opcode, fetchdat, decode_delay_offset, op_32, agi_stall);
decode_delay_offset = 0;
regmask_modified = get_dstdep_mask(deps[opcode], fetchdat, bit8);
addr_regmask = 0;
}
void
codegen_timing_pentium_block_end(void)
{
if (u_pipe_full) {
/*Run previous now*/
if (check_agi(u_pipe_deps, u_pipe_opcode, u_pipe_fetchdat, u_pipe_op_32))
codegen_block_cycles++;
codegen_block_cycles += COUNT(u_pipe_timings[u_pipe_opcode], u_pipe_deps[u_pipe_opcode], u_pipe_op_32) + decode_delay + decode_delay_offset;
u_pipe_full = 0;
}
}
codegen_timing_t codegen_timing_pentium = {
codegen_timing_pentium_start,
codegen_timing_pentium_prefix,
codegen_timing_pentium_opcode,
codegen_timing_pentium_block_start,
codegen_timing_pentium_block_end,
NULL
};