2368 lines
108 KiB
C
2368 lines
108 KiB
C
/*Basic P6 timing model by plant/nerd73. Based on the K6 timing model*/
|
|
/*Some cycle timings come from https://www.agner.org/optimize/instruction_tables.pdf*/
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <wchar.h>
|
|
#include <86box/86box.h>
|
|
#include "cpu.h"
|
|
#include <86box/mem.h>
|
|
#include <86box/machine.h>
|
|
|
|
#include "x86.h"
|
|
#include "x86_ops.h"
|
|
#include "x87.h"
|
|
#include "386_common.h"
|
|
#include "codegen.h"
|
|
#include "codegen_ops.h"
|
|
#include "codegen_timing_common.h"
|
|
|
|
typedef enum uop_type_t
|
|
{
|
|
UOP_ALU = 0, /*Executes in Integer X or Y units*/
|
|
UOP_ALUX, /*Executes in Integer X unit*/
|
|
UOP_LOAD, /*Executes in Load unit*/
|
|
UOP_STORED, /*Executes in Data Store unit*/
|
|
UOP_STOREA, /*Executes in Address Store unit*/
|
|
UOP_FLOAD, /*Executes in Load unit*/
|
|
UOP_FSTORED, /*Executes in Data Store unit*/
|
|
UOP_FSTOREA, /*Executes in Address Store unit*/
|
|
UOP_MLOAD, /*Executes in Load unit*/
|
|
UOP_MSTORED, /*Executes in Data Store unit*/
|
|
UOP_MSTOREA, /*Executes in Address Store unit*/
|
|
UOP_FLOAT, /*Executes in Floating Point unit*/
|
|
UOP_MMX, /*Executes in Integer X or Y units as MMX*/
|
|
UOP_MMX_SHIFT, /*Executes in Integer Y unit. Uses MMX shifter*/
|
|
UOP_MMX_MUL, /*Executes in Integer X unit. Uses MMX multiplier*/
|
|
UOP_BRANCH, /*Executes in Branch unit*/
|
|
UOP_LIMM /*Does not require an execution unit*/
|
|
} uop_type_t;
|
|
|
|
typedef enum decode_type_t
|
|
{
|
|
DECODE_SIMPLE,
|
|
DECODE_COMPLEX,
|
|
} decode_type_t;
|
|
|
|
#define MAX_UOPS 10
|
|
|
|
typedef struct risc86_uop_t
|
|
{
|
|
uop_type_t type;
|
|
double latency;
|
|
} risc86_uop_t;
|
|
|
|
typedef struct risc86_instruction_t
|
|
{
|
|
int nr_uops;
|
|
decode_type_t decode_type;
|
|
risc86_uop_t uop[MAX_UOPS];
|
|
} risc86_instruction_t;
|
|
|
|
static const risc86_instruction_t alu_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t alux_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t load_alu_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t load_alux_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t alu_store_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t alux_store_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
|
|
static const risc86_instruction_t branch_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
|
|
static const risc86_instruction_t limm_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_LIMM, .latency = 1}
|
|
};
|
|
|
|
static const risc86_instruction_t load_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1}
|
|
};
|
|
|
|
static const risc86_instruction_t store_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
|
|
|
|
static const risc86_instruction_t bswap_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t leave_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t lods_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t loop_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t mov_reg_seg_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t movs_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t pop_reg_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t pop_mem_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t push_imm_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t push_mem_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t push_seg_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t stos_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t test_reg_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t test_reg_b_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t test_mem_imm_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t test_mem_imm_b_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t xchg_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
|
|
|
|
static const risc86_instruction_t mmx_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_MMX, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t mmx_mul_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t mmx_shift_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_MMX_SHIFT, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t load_mmx_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_MMX, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t load_mmx_mul_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t load_mmx_shift_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[1] = {.type = UOP_MMX_SHIFT, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t mload_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_MLOAD, .latency = 3},
|
|
};
|
|
|
|
static const risc86_instruction_t mstore_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_MSTORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_MSTOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t pmul_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_MMX_MUL, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t pmul_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[1] = {.type = UOP_MMX_MUL, .latency = 1.5}
|
|
};
|
|
static const risc86_instruction_t float_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t fadd_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t fmul_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_SIMPLE,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 5}
|
|
};
|
|
static const risc86_instruction_t float2_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t fchs_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 2},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 2},
|
|
.uop[2] = {.type = UOP_FLOAT, .latency = 2}
|
|
};
|
|
static const risc86_instruction_t load_float_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t load_fadd_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t load_fmul_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 5}
|
|
};
|
|
static const risc86_instruction_t fstore_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FSTORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_FSTOREA, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t load_fiadd_op =
|
|
{
|
|
.nr_uops = 7,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[2] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[3] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[4] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[5] = {.type = UOP_FLOAT, .latency = 1},
|
|
.uop[6] = {.type = UOP_FLOAT, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t fdiv_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 32}
|
|
};
|
|
static const risc86_instruction_t fdiv_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAT, .latency = 38}
|
|
};
|
|
static const risc86_instruction_t fsin_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 60}
|
|
};
|
|
static const risc86_instruction_t fsqrt_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 69}
|
|
};
|
|
|
|
static const risc86_instruction_t complex_fldcw_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_float_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_float_l_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 50}
|
|
};
|
|
static const risc86_instruction_t complex_flde_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_FLOAD, .latency = 1},
|
|
.uop[2] = {.type = UOP_FLOAT, .latency = 2}
|
|
};
|
|
static const risc86_instruction_t complex_fste_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_FLOAT, .latency = 2},
|
|
.uop[1] = {.type = UOP_FSTORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_FSTOREA, .latency = 1}
|
|
};
|
|
|
|
static const risc86_instruction_t complex_alu1_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alu2_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alu3_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alu6_op =
|
|
{
|
|
.nr_uops = 6,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[4] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[5] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alux1_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alux3_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alux6_op =
|
|
{
|
|
.nr_uops = 6,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[4] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[5] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_alu_store_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t complex_alux_store_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_arpl_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 3},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t complex_bound_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_bsx_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_call_far_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 3},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_cli_sti_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 7}
|
|
};
|
|
static const risc86_instruction_t complex_cmps_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_cmpsb_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_cmpxchg_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_cmpxchg_b_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_cpuid_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 23}
|
|
};
|
|
static const risc86_instruction_t complex_div16_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_div16_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_div32_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 18}
|
|
};
|
|
static const risc86_instruction_t complex_div32_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 18}
|
|
};
|
|
static const risc86_instruction_t complex_emms_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 25}
|
|
};
|
|
static const risc86_instruction_t complex_enter_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_femms_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 6}
|
|
};
|
|
static const risc86_instruction_t complex_in_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 10}
|
|
};
|
|
static const risc86_instruction_t complex_ins_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 10},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_int_op =
|
|
{
|
|
.nr_uops = 8,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 20},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[4] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[5] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[6] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[7] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_iret_op =
|
|
{
|
|
.nr_uops = 5,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[1] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[2] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 20},
|
|
.uop[4] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_invd_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 500}
|
|
};
|
|
static const risc86_instruction_t complex_jmp_far_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 3},
|
|
.uop[1] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_load_alu_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_load_alux_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_loop_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_lss_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t complex_mov_mem_seg_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
};
|
|
static const risc86_instruction_t complex_mov_seg_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t complex_mov_seg_reg_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 3}
|
|
};
|
|
static const risc86_instruction_t complex_mul_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_mul_mem_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_mul64_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_mul64_mem_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_out_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 10},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 10},
|
|
};
|
|
static const risc86_instruction_t complex_outs_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 10},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 10},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_pusha_op =
|
|
{
|
|
.nr_uops = 8,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 2},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 2},
|
|
.uop[2] = {.type = UOP_STORED, .latency = 2},
|
|
.uop[3] = {.type = UOP_STOREA, .latency = 2},
|
|
.uop[4] = {.type = UOP_STORED, .latency = 2},
|
|
.uop[5] = {.type = UOP_STOREA, .latency = 2},
|
|
.uop[6] = {.type = UOP_STORED, .latency = 2},
|
|
.uop[7] = {.type = UOP_STOREA, .latency = 2}
|
|
};
|
|
static const risc86_instruction_t complex_popa_op =
|
|
{
|
|
.nr_uops = 8,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[2] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[3] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[4] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[5] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[6] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[7] = {.type = UOP_LOAD, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_popf_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 3},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 17}
|
|
};
|
|
static const risc86_instruction_t complex_push_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[1] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_pushf_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_ret_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_retf_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 3},
|
|
.uop[2] = {.type = UOP_BRANCH, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_scas_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_scasb_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_setcc_mem_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_FSTORED, .latency = 1},
|
|
.uop[3] = {.type = UOP_FSTOREA, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_setcc_reg_op =
|
|
{
|
|
.nr_uops = 3,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1},
|
|
.uop[2] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_test_mem_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_test_mem_b_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_ALUX, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_xchg_mem_op =
|
|
{
|
|
.nr_uops = 4,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_LOAD, .latency = 1},
|
|
.uop[1] = {.type = UOP_STORED, .latency = 1},
|
|
.uop[2] = {.type = UOP_STOREA, .latency = 1},
|
|
.uop[3] = {.type = UOP_ALU, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_xlat_op =
|
|
{
|
|
.nr_uops = 2,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 1},
|
|
.uop[1] = {.type = UOP_LOAD, .latency = 1}
|
|
};
|
|
static const risc86_instruction_t complex_wbinvd_op =
|
|
{
|
|
.nr_uops = 1,
|
|
.decode_type = DECODE_COMPLEX,
|
|
.uop[0] = {.type = UOP_ALU, .latency = 10000}
|
|
};
|
|
|
|
|
|
#define INVALID NULL
|
|
|
|
static const risc86_instruction_t *opcode_timings[256] =
|
|
{
|
|
/* ADD ADD ADD ADD*/
|
|
/*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
|
/* ADD ADD PUSH ES POP ES*/
|
|
&alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
/* OR OR OR OR*/
|
|
&alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
|
/* OR OR PUSH CS */
|
|
&alux_op, &alu_op, &push_seg_op, INVALID,
|
|
|
|
/* ADC ADC ADC ADC*/
|
|
/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op,
|
|
/* ADC ADC PUSH SS POP SS*/
|
|
&complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
/* SBB SBB SBB SBB*/
|
|
/*10*/ &complex_alux_store_op,&complex_alu_store_op, &complex_load_alux_op,&complex_load_alu_op,
|
|
/* SBB SBB PUSH DS POP DS*/
|
|
&complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
|
|
/* AND AND AND AND*/
|
|
/*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
|
/* AND AND DAA*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux1_op,
|
|
/* SUB SUB SUB SUB*/
|
|
&alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
|
/* SUB SUB DAS*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux1_op,
|
|
|
|
/* XOR XOR XOR XOR*/
|
|
/*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
|
/* XOR XOR AAA*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux6_op,
|
|
/* CMP CMP CMP CMP*/
|
|
&load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op,
|
|
/* CMP CMP AAS*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux6_op,
|
|
|
|
/* INC EAX INC ECX INC EDX INC EBX*/
|
|
/*40*/ &alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* INC ESP INC EBP INC ESI INC EDI*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
|
|
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
|
|
/*50*/ &store_op, &store_op, &store_op, &store_op,
|
|
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
|
|
&store_op, &store_op, &store_op, &store_op,
|
|
/* POP EAX POP ECX POP EDX POP EBX*/
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
|
/* POP ESP POP EBP POP ESI POP EDI*/
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
|
|
|
/* PUSHA POPA BOUND ARPL*/
|
|
/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* PUSH imm IMUL PUSH imm IMUL*/
|
|
&push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op,
|
|
/* INSB INSW OUTSB OUTSW*/
|
|
&complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op,
|
|
|
|
/* Jxx*/
|
|
/*70*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* TEST TEST XCHG XCHG*/
|
|
&complex_test_mem_b_op, &complex_test_mem_op, &complex_xchg_mem_op, &complex_xchg_mem_op,
|
|
/* MOV MOV MOV MOV*/
|
|
&store_op, &store_op, &load_op, &load_op,
|
|
/* MOV from seg LEA MOV to seg POP*/
|
|
&complex_mov_mem_seg_op, &store_op, &complex_mov_seg_mem_op, &pop_mem_op,
|
|
|
|
/* NOP XCHG XCHG XCHG*/
|
|
/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op,
|
|
/* XCHG XCHG XCHG XCHG*/
|
|
&xchg_op, &xchg_op, &xchg_op, &xchg_op,
|
|
/* CBW CWD CALL far WAIT*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op,
|
|
/* PUSHF POPF SAHF LAHF*/
|
|
&complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op,
|
|
|
|
/* MOV MOV MOV MOV*/
|
|
/*a0*/ &load_op, &load_op, &store_op, &store_op,
|
|
/* MOVSB MOVSW CMPSB CMPSW*/
|
|
&movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op,
|
|
/* TEST TEST STOSB STOSW*/
|
|
&test_reg_b_op, &test_reg_op, &stos_op, &stos_op,
|
|
/* LODSB LODSW SCASB SCASW*/
|
|
&lods_op, &lods_op, &complex_scasb_op, &complex_scas_op,
|
|
|
|
/* MOV*/
|
|
/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
|
|
/* RET imm RET*/
|
|
/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op,
|
|
/* LES LDS MOV MOV*/
|
|
&complex_lss_op, &complex_lss_op, &store_op, &store_op,
|
|
/* ENTER LEAVE RETF RETF*/
|
|
&complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op,
|
|
/* INT3 INT INTO IRET*/
|
|
&complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op,
|
|
|
|
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* AAM AAD SETALC XLAT*/
|
|
&complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* LOOPNE LOOPE LOOP JCXZ*/
|
|
/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op,
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
|
&complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op,
|
|
/* CALL JMP JMP JMP*/
|
|
&store_op, &branch_op, &complex_jmp_far_op, &branch_op,
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
|
&complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op,
|
|
|
|
/* REPNE REPE*/
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* HLT CMC*/
|
|
&complex_alux1_op, &complex_alu2_op, INVALID, INVALID,
|
|
/* CLC STC CLI STI*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op,
|
|
/* CLD STD INCDEC*/
|
|
&complex_alu1_op, &complex_alu1_op, &alux_store_op, INVALID
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_mod3[256] =
|
|
{
|
|
/* ADD ADD ADD ADD*/
|
|
/*00*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* ADD ADD PUSH ES POP ES*/
|
|
&alux_op, &alu_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
/* OR OR OR OR*/
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* OR OR PUSH CS */
|
|
&alux_op, &alu_op, &push_seg_op, INVALID,
|
|
|
|
/* ADC ADC ADC ADC*/
|
|
/*10*/ &complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op,
|
|
/* ADC ADC PUSH SS POP SS*/
|
|
&complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
/* SBB SBB SBB SBB*/
|
|
&complex_alux1_op, &complex_alu1_op, &complex_alux1_op, &complex_alu1_op,
|
|
/* SBB SBB PUSH DS POP DS*/
|
|
&complex_alux1_op, &complex_alu1_op, &push_seg_op, &complex_mov_seg_mem_op,
|
|
|
|
/* AND AND AND AND*/
|
|
/*20*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* AND AND DAA*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux1_op,
|
|
/* SUB SUB SUB SUB*/
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* SUB SUB DAS*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux1_op,
|
|
|
|
/* XOR XOR XOR XOR*/
|
|
/*30*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* XOR XOR AAA*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux6_op,
|
|
/* CMP CMP CMP CMP*/
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
|
/* CMP CMP AAS*/
|
|
&alux_op, &alu_op, INVALID, &complex_alux6_op,
|
|
|
|
/* INC EAX INC ECX INC EDX INC EBX*/
|
|
/*40*/ &alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* INC ESP INC EBP INC ESI INC EDI*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
|
|
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
|
|
/*50*/ &store_op, &store_op, &store_op, &store_op,
|
|
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
|
|
&store_op, &store_op, &store_op, &store_op,
|
|
/* POP EAX POP ECX POP EDX POP EBX*/
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
|
/* POP ESP POP EBP POP ESI POP EDI*/
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
|
|
|
/* PUSHA POPA BOUND ARPL*/
|
|
/*60*/ &complex_pusha_op, &complex_popa_op, &complex_bound_op, &complex_arpl_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* PUSH imm IMUL PUSH imm IMUL*/
|
|
&push_imm_op, &complex_mul_op, &push_imm_op, &complex_mul_op,
|
|
/* INSB INSW OUTSB OUTSW*/
|
|
&complex_ins_op, &complex_ins_op, &complex_outs_op, &complex_outs_op,
|
|
|
|
/* Jxx*/
|
|
/*70*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* TEST TEST XCHG XCHG*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_alu3_op, &complex_alu3_op,
|
|
/* MOV MOV MOV MOV*/
|
|
&store_op, &store_op, &load_op, &load_op,
|
|
/* MOV from seg LEA MOV to seg POP*/
|
|
&mov_reg_seg_op, &store_op, &complex_mov_seg_reg_op, &pop_reg_op,
|
|
|
|
/* NOP XCHG XCHG XCHG*/
|
|
/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op,
|
|
/* XCHG XCHG XCHG XCHG*/
|
|
&xchg_op, &xchg_op, &xchg_op, &xchg_op,
|
|
/* CBW CWD CALL far WAIT*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_call_far_op, &limm_op,
|
|
/* PUSHF POPF SAHF LAHF*/
|
|
&complex_pushf_op, &complex_popf_op, &complex_alux1_op, &complex_alux1_op,
|
|
|
|
/* MOV MOV MOV MOV*/
|
|
/*a0*/ &load_op, &load_op, &store_op, &store_op,
|
|
/* MOVSB MOVSW CMPSB CMPSW*/
|
|
&movs_op, &movs_op, &complex_cmpsb_op, &complex_cmps_op,
|
|
/* TEST TEST STOSB STOSW*/
|
|
&test_reg_b_op, &test_reg_op, &stos_op, &stos_op,
|
|
/* LODSB LODSW SCASB SCASW*/
|
|
&lods_op, &lods_op, &complex_scasb_op, &complex_scas_op,
|
|
|
|
/* MOV*/
|
|
/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
|
|
/* RET imm RET*/
|
|
/*c0*/ INVALID, INVALID, &complex_ret_op, &complex_ret_op,
|
|
/* LES LDS MOV MOV*/
|
|
&complex_lss_op, &complex_lss_op, &store_op, &store_op,
|
|
/* ENTER LEAVE RETF RETF*/
|
|
&complex_enter_op, &leave_op, &complex_retf_op, &complex_retf_op,
|
|
/* INT3 INT INTO IRET*/
|
|
&complex_int_op, &complex_int_op, &complex_int_op, &complex_iret_op,
|
|
|
|
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* AAM AAD SETALC XLAT*/
|
|
&complex_alux6_op, &complex_alux3_op, &complex_alux1_op, &complex_xlat_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* LOOPNE LOOPE LOOP JCXZ*/
|
|
/*e0*/ &complex_loop_op, &complex_loop_op, &loop_op, &complex_loop_op,
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
|
&complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op,
|
|
/* CALL JMP JMP JMP*/
|
|
&store_op, &branch_op, &complex_jmp_far_op, &branch_op,
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
|
&complex_in_op, &complex_in_op, &complex_out_op, &complex_out_op,
|
|
|
|
/* REPNE REPE*/
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
|
/* HLT CMC*/
|
|
&complex_alux1_op, &complex_alu2_op, INVALID, INVALID,
|
|
/* CLC STC CLI STI*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_cli_sti_op, &complex_cli_sti_op,
|
|
/* CLD STD INCDEC*/
|
|
&complex_alu1_op, &complex_alu1_op, &complex_alux1_op, INVALID
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_0f[256] =
|
|
{
|
|
/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op,
|
|
INVALID, &complex_alu6_op, &complex_alu6_op, INVALID,
|
|
&complex_invd_op, &complex_wbinvd_op, INVALID, INVALID,
|
|
INVALID, &load_op, &complex_femms_op, INVALID,
|
|
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op,
|
|
&complex_alu6_op, &complex_alu6_op, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
|
INVALID, INVALID, &mload_op, &mload_op,
|
|
|
|
/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &complex_emms_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, &mstore_op, &mstore_op,
|
|
|
|
/*80*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
|
|
/*90*/ &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op,
|
|
&complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op,
|
|
&complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op,
|
|
&complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op, &complex_setcc_reg_op,
|
|
|
|
/*a0*/ &push_seg_op, &complex_mov_seg_mem_op,&complex_cpuid_op, &complex_load_alu_op,
|
|
&complex_alu_store_op, &complex_alu_store_op, INVALID, INVALID,
|
|
&push_seg_op, &complex_mov_seg_mem_op,INVALID, &complex_load_alu_op,
|
|
&complex_alu_store_op, &complex_alu_store_op, INVALID, &complex_mul_op,
|
|
|
|
/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_load_alu_op,
|
|
&complex_lss_op, &complex_lss_op, &load_alux_op, &load_alu_op,
|
|
INVALID, INVALID, &complex_load_alu_op, &complex_load_alu_op,
|
|
&complex_bsx_op, &complex_bsx_op, &load_alux_op, &load_alu_op,
|
|
|
|
/*c0*/ &complex_alux_store_op, &complex_alu_store_op, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, &complex_cmpxchg_op,
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
|
|
|
/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
|
INVALID, &load_mmx_mul_op, INVALID, INVALID,
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
|
|
|
/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID,
|
|
INVALID, &pmul_mem_op, INVALID, INVALID,
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
|
|
|
/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
|
INVALID, &pmul_mem_op, INVALID, INVALID,
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID,
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_0f_mod3[256] =
|
|
{
|
|
/*00*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op,
|
|
INVALID, &complex_alu6_op, &complex_alu6_op, INVALID,
|
|
&complex_invd_op, &complex_wbinvd_op, INVALID, INVALID,
|
|
INVALID, INVALID, &complex_femms_op, INVALID,
|
|
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*20*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, &complex_alu6_op,
|
|
&complex_alu6_op, &complex_alu6_op, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*30*/ &complex_alu6_op, &complex_alu6_op, &complex_alu6_op, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
|
&mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
|
&mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
|
INVALID, INVALID, &mmx_op, &mmx_op,
|
|
|
|
/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
|
&mmx_op, &mmx_op, &mmx_op, &complex_emms_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, &mmx_op, &mmx_op,
|
|
|
|
/*80*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
|
|
|
/*90*/ &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op,
|
|
&complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op,
|
|
&complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op,
|
|
&complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op, &complex_setcc_mem_op,
|
|
|
|
/*a0*/ &push_seg_op, &complex_mov_seg_mem_op, &complex_cpuid_op, &complex_alu1_op,
|
|
&complex_alu1_op, &complex_alu1_op, INVALID, INVALID,
|
|
&push_seg_op, &complex_mov_seg_mem_op, INVALID, &complex_alu1_op,
|
|
&complex_alu1_op, &complex_alu1_op, INVALID, &complex_mul_op,
|
|
|
|
/*b0*/ &complex_cmpxchg_b_op, &complex_cmpxchg_op, &complex_lss_op, &complex_alu1_op,
|
|
&complex_lss_op, &complex_lss_op, &alux_op, &alu_op,
|
|
INVALID, INVALID, &complex_alu1_op, &complex_alu1_op,
|
|
&complex_bsx_op, &complex_bsx_op, &alux_op, &alu_op,
|
|
|
|
/*c0*/ &complex_alux1_op, &complex_alu1_op, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
|
|
|
/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
|
INVALID, &mmx_mul_op, INVALID, INVALID,
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
|
|
|
/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID,
|
|
INVALID, &pmul_op, INVALID, INVALID,
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
|
|
|
/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
|
INVALID, &pmul_op, INVALID, INVALID,
|
|
&mmx_op, &mmx_op, &mmx_op, INVALID,
|
|
&mmx_op, &mmx_op, &mmx_op, INVALID,
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_0f0f[256] =
|
|
{
|
|
/*00*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*20*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*30*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*60*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*70*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*90*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*a0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, &load_mmx_mul_op, &load_mmx_mul_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*b0*/ INVALID, INVALID, INVALID, INVALID,
|
|
&load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, &load_mmx_op,
|
|
|
|
/*c0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*e0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_0f0f_mod3[256] =
|
|
{
|
|
/*00*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*20*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*30*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*60*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*70*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*90*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*a0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, &mmx_mul_op, &mmx_mul_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*b0*/ INVALID, INVALID, INVALID, INVALID,
|
|
&mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, &mmx_op,
|
|
|
|
/*c0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*e0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_shift[8] =
|
|
{
|
|
&complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op,
|
|
&complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op, &complex_alu_store_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_shift_b[8] =
|
|
{
|
|
&complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op,
|
|
&complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op, &complex_alux_store_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_shift_mod3[8] =
|
|
{
|
|
&complex_alu1_op, &complex_alu1_op, &complex_alu1_op, &complex_alu1_op,
|
|
&alu_op, &alu_op, &alu_op, &alu_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_shift_b_mod3[8] =
|
|
{
|
|
&complex_alux1_op, &complex_alux1_op, &complex_alux1_op, &complex_alux1_op,
|
|
&alux_op, &alux_op, &alux_op, &alux_op
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_80[8] =
|
|
{
|
|
&alux_store_op, &alux_store_op, &complex_alux_store_op, &complex_alux_store_op,
|
|
&alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_80_mod3[8] =
|
|
{
|
|
&alux_op, &alux_op, &alux_store_op, &alux_store_op,
|
|
&alux_op, &alux_op, &alux_op, &alux_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_8x[8] =
|
|
{
|
|
&alu_store_op, &alu_store_op, &complex_alu_store_op, &complex_alu_store_op,
|
|
&alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_8x_mod3[8] =
|
|
{
|
|
&alu_op, &alu_op, &alu_store_op, &alu_store_op,
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_f6[8] =
|
|
{
|
|
/* TST NOT NEG*/
|
|
&test_mem_imm_b_op, INVALID, &complex_alux_store_op, &complex_alux_store_op,
|
|
/* MUL IMUL DIV IDIV*/
|
|
&complex_mul_mem_op, &complex_mul_mem_op, &complex_div16_mem_op, &complex_div16_mem_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_f6_mod3[8] =
|
|
{
|
|
/* TST NOT NEG*/
|
|
&test_reg_b_op, INVALID, &alux_op, &alux_op,
|
|
/* MUL IMUL DIV IDIV*/
|
|
&complex_mul_op, &complex_mul_op, &complex_div16_op, &complex_div16_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_f7[8] =
|
|
{
|
|
/* TST NOT NEG*/
|
|
&test_mem_imm_op, INVALID, &complex_alu_store_op, &complex_alu_store_op,
|
|
/* MUL IMUL DIV IDIV*/
|
|
&complex_mul64_mem_op, &complex_mul64_mem_op, &complex_div32_mem_op, &complex_div32_mem_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_f7_mod3[8] =
|
|
{
|
|
/* TST NOT NEG*/
|
|
&test_reg_op, INVALID, &alu_op, &alu_op,
|
|
/* MUL IMUL DIV IDIV*/
|
|
&complex_mul64_op, &complex_mul64_op, &complex_div32_op, &complex_div32_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_ff[8] =
|
|
{
|
|
/* INC DEC CALL CALL far*/
|
|
&alu_store_op, &alu_store_op, &store_op, &complex_call_far_op,
|
|
/* JMP JMP far PUSH*/
|
|
&branch_op, &complex_jmp_far_op, &push_mem_op, INVALID
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_ff_mod3[8] =
|
|
{
|
|
/* INC DEC CALL CALL far*/
|
|
&complex_alu1_op, &complex_alu1_op, &store_op, &complex_call_far_op,
|
|
/* JMP JMP far PUSH*/
|
|
&branch_op, &complex_jmp_far_op, &complex_push_mem_op, INVALID
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_d8[8] =
|
|
{
|
|
/* FADDs FMULs FCOMs FCOMPs*/
|
|
&load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op,
|
|
/* FSUBs FSUBRs FDIVs FDIVRs*/
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_d8_mod3[8] =
|
|
{
|
|
/* FADD FMUL FCOM FCOMP*/
|
|
&fadd_op, &fmul_op, &float_op, &float_op,
|
|
/* FSUB FSUBR FDIV FDIVR*/
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op,
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_d9[8] =
|
|
{
|
|
/* FLDs FSTs FSTPs*/
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
|
/* FLDENV FLDCW FSTENV FSTCW*/
|
|
&complex_float_l_op, &complex_fldcw_op, &complex_float_l_op, &complex_float_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_d9_mod3[64] =
|
|
{
|
|
/*FLD*/
|
|
&float_op, &float_op, &float_op, &float_op,
|
|
&float_op, &float_op, &float_op, &float_op,
|
|
/*FXCH*/
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
|
/*FNOP*/
|
|
&float_op, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/*FSTP*/
|
|
&float2_op, &float2_op, &float2_op, &float2_op,
|
|
&float2_op, &float2_op, &float2_op, &float2_op,
|
|
/* opFCHS opFABS*/
|
|
&fchs_op, &float_op, INVALID, INVALID,
|
|
/* opFTST opFXAM*/
|
|
&float_op, &float_op, INVALID, INVALID,
|
|
/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/
|
|
&float_op, &float_op, &float_op, &float_op,
|
|
/* opFLDEG2 opFLDLN2 opFLDZ*/
|
|
&float_op, &float_op, &float_op, INVALID,
|
|
/* opF2XM1 opFYL2X opFPTAN opFPATAN*/
|
|
&fsin_op, &fsin_op, &fsin_op, &fsin_op,
|
|
/* opFDECSTP opFINCSTP,*/
|
|
INVALID, INVALID, &float_op, &float_op,
|
|
/* opFPREM opFSQRT opFSINCOS*/
|
|
&fdiv_op, INVALID, &fsqrt_op, &fsin_op,
|
|
/* opFRNDINT opFSCALE opFSIN opFCOS*/
|
|
&float_op, &fdiv_op, &fsin_op, &fsin_op
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_da[8] =
|
|
{
|
|
/* FIADDl FIMULl FICOMl FICOMPl*/
|
|
&load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op,
|
|
/* FISUBl FISUBRl FIDIVl FIDIVRl*/
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_da_mod3[8] =
|
|
{
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* FCOMPP*/
|
|
INVALID, &float_op, INVALID, INVALID
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_db[8] =
|
|
{
|
|
/* FLDil FSTil FSTPil*/
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
|
/* FLDe FSTPe*/
|
|
INVALID, &complex_flde_op, INVALID, &complex_fste_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_db_mod3[64] =
|
|
{
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
/* opFNOP opFCLEX opFINIT*/
|
|
INVALID, &float_op, &float_op, &float_op,
|
|
/* opFNOP opFNOP*/
|
|
&float_op, &float_op, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_dc[8] =
|
|
{
|
|
/* FADDd FMULd FCOMd FCOMPd*/
|
|
&load_fadd_op, &load_fmul_op, &load_float_op, &load_float_op,
|
|
/* FSUBd FSUBRd FDIVd FDIVRd*/
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_dc_mod3[8] =
|
|
{
|
|
/* opFADDr opFMULr*/
|
|
&fadd_op, &fmul_op, INVALID, INVALID,
|
|
/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_dd[8] =
|
|
{
|
|
/* FLDd FSTd FSTPd*/
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
|
/* FRSTOR FSAVE FSTSW*/
|
|
&complex_float_l_op, INVALID, &complex_float_l_op, &complex_float_l_op
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_dd_mod3[8] =
|
|
{
|
|
/* FFFREE FST FSTP*/
|
|
&float_op, INVALID, &float_op, &float_op,
|
|
/* FUCOM FUCOMP*/
|
|
&float_op, &float_op, INVALID, INVALID
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_de[8] =
|
|
{
|
|
/* FIADDw FIMULw FICOMw FICOMPw*/
|
|
&load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op,
|
|
/* FISUBw FISUBRw FIDIVw FIDIVRw*/
|
|
&load_fiadd_op, &load_fiadd_op, &load_fiadd_op, &load_fiadd_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_de_mod3[8] =
|
|
{
|
|
/* FADDP FMULP FCOMPP*/
|
|
&fadd_op, &fmul_op, INVALID, &float_op,
|
|
/* FSUBP FSUBRP FDIVP FDIVRP*/
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op,
|
|
};
|
|
|
|
static const risc86_instruction_t *opcode_timings_df[8] =
|
|
{
|
|
/* FILDiw FISTiw FISTPiw*/
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
|
/* FILDiq FBSTP FISTPiq*/
|
|
INVALID, &load_float_op, &complex_float_l_op, &fstore_op,
|
|
};
|
|
static const risc86_instruction_t *opcode_timings_df_mod3[8] =
|
|
{
|
|
INVALID, INVALID, INVALID, INVALID,
|
|
/* FSTSW AX*/
|
|
&float_op, INVALID, INVALID, INVALID
|
|
};
|
|
|
|
|
|
static uint8_t last_prefix;
|
|
static int prefixes;
|
|
|
|
static int decode_timestamp;
|
|
static int last_complete_timestamp;
|
|
|
|
typedef struct p6_unit_t
|
|
{
|
|
uint32_t uop_mask;
|
|
double first_available_cycle;
|
|
} p6_unit_t;
|
|
|
|
static int nr_units;
|
|
static p6_unit_t *units;
|
|
|
|
/*Pentium Pro has no MMX*/
|
|
static p6_unit_t ppro_units[] =
|
|
{
|
|
{.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT)}, /*Integer X & Floating point*/
|
|
{.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH)}, /*Integer Y*/
|
|
{.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD)}, /*Load*/
|
|
{.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED)}, /*Data Store*/
|
|
{.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA)}, /*Address Store*/
|
|
};
|
|
#define NR_PPRO_UNITS (sizeof(ppro_units) / sizeof(p6_unit_t))
|
|
|
|
/*Pentium II/Celeron assigns the multiplier to port 0, the shifter to port 1, and shares the MMX ALU*/
|
|
static p6_unit_t p2_units[] =
|
|
{
|
|
{.uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_FLOAT) | /*Integer X & Floating point*/
|
|
(1 << UOP_MMX) | (1 << UOP_MMX_MUL)},
|
|
{.uop_mask = (1 << UOP_ALU) | (1 << UOP_BRANCH) | /*Integer Y*/
|
|
(1 << UOP_MMX) | (1 << UOP_MMX_SHIFT)},
|
|
{.uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD)}, /*Load*/
|
|
{.uop_mask = (1 << UOP_STORED) | (1 << UOP_FSTORED) | (1 << UOP_MSTORED)}, /*Data Store*/
|
|
{.uop_mask = (1 << UOP_STOREA) | (1 << UOP_FSTOREA) | (1 << UOP_MSTOREA)}, /*Address Store*/
|
|
};
|
|
#define NR_P2_UNITS (sizeof(p2_units) / sizeof(p6_unit_t))
|
|
|
|
static int uop_run(const risc86_uop_t *uop, int decode_time)
|
|
{
|
|
int c;
|
|
p6_unit_t *best_unit = NULL;
|
|
double best_start_cycle = 99999;
|
|
|
|
/*UOP_LIMM does not require execution*/
|
|
if (uop->type == UOP_LIMM)
|
|
return decode_time;
|
|
|
|
/*Find execution unit for this uOP*/
|
|
for (c = 0; c < nr_units; c++)
|
|
{
|
|
if (units[c].uop_mask & (1 << uop->type))
|
|
{
|
|
if (units[c].first_available_cycle < best_start_cycle)
|
|
{
|
|
best_unit = &units[c];
|
|
best_start_cycle = units[c].first_available_cycle;
|
|
}
|
|
}
|
|
}
|
|
if (!best_unit)
|
|
fatal("uop_run: can not find execution unit\n");
|
|
|
|
if (best_start_cycle < decode_time)
|
|
best_start_cycle = decode_time;
|
|
best_unit->first_available_cycle = best_start_cycle + uop->latency;
|
|
|
|
|
|
|
|
return best_start_cycle + uop->latency;
|
|
}
|
|
|
|
/*The P6 decoders can decode, per clock :
|
|
- 1 to 3 'simple' instructions, each up to 1 uOP and 7 bytes long
|
|
- 1 'complex' instruction, up to 4 uOPs or 3 per cycle for instructions longer than 4 uOPs
|
|
*/
|
|
static struct
|
|
{
|
|
int nr_uops;
|
|
const risc86_uop_t *uops[6];
|
|
/*Earliest time a uop can start. If the timestamp is -1, then the uop is
|
|
part of a dependency chain and the start time is the completion time of
|
|
the previous uop*/
|
|
int earliest_start[6];
|
|
} decode_buffer;
|
|
|
|
#define NR_OPSEQS 3
|
|
/*Timestamps of when the last three op sequences completed. Technically this is incorrect,
|
|
as the actual size of the opseq buffer is 20 bytes and not 18, but I'm restricted to multiples of 6*/
|
|
static int opseq_completion_timestamp[NR_OPSEQS];
|
|
static int next_opseq = 0;
|
|
|
|
#define NR_REGS 8
|
|
/*Timestamp of when last operation on an integer register completed*/
|
|
static int reg_available_timestamp[NR_REGS];
|
|
/*Timestamp of when last operation on an FPU register completed*/
|
|
static int fpu_st_timestamp[8];
|
|
/*Completion time of the last uop to be processed. Used to calculate timing of
|
|
dependent uop chains*/
|
|
static int last_uop_timestamp = 0;
|
|
|
|
void decode_flush_p6()
|
|
{
|
|
int c;
|
|
int uop_timestamp = 0;
|
|
|
|
/*Decoded opseq can not be submitted if there are no free spaces in the
|
|
opseq buffer*/
|
|
if (decode_timestamp < opseq_completion_timestamp[next_opseq])
|
|
decode_timestamp = opseq_completion_timestamp[next_opseq];
|
|
|
|
/*Ensure that uops can not be submitted before they have been decoded*/
|
|
if (decode_timestamp > last_uop_timestamp)
|
|
last_uop_timestamp = decode_timestamp;
|
|
|
|
/*Submit uops to execution units, and determine the latest completion time*/
|
|
for (c = 0; c < (decode_buffer.nr_uops); c++)
|
|
{
|
|
int start_timestamp;
|
|
|
|
|
|
start_timestamp = decode_buffer.earliest_start[c];
|
|
|
|
last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp);
|
|
if (last_uop_timestamp > uop_timestamp)
|
|
uop_timestamp = last_uop_timestamp;
|
|
}
|
|
|
|
/*Calculate opseq completion time. Since opseqs complete in order, it
|
|
must be after the last completion.*/
|
|
if (uop_timestamp <= last_complete_timestamp)
|
|
last_complete_timestamp = last_complete_timestamp + 1;
|
|
else
|
|
last_complete_timestamp = uop_timestamp;
|
|
|
|
/*Advance to next opseq in buffer*/
|
|
opseq_completion_timestamp[next_opseq] = last_complete_timestamp;
|
|
next_opseq++;
|
|
if (next_opseq == NR_OPSEQS)
|
|
next_opseq = 0;
|
|
|
|
decode_timestamp++;
|
|
decode_buffer.nr_uops = 0;
|
|
}
|
|
|
|
/*The instruction is only of interest here if it's longer than 7 bytes, as that's the
|
|
limit on P6 simple decoding*/
|
|
static int codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32)
|
|
{
|
|
int len = prefixes + 1; /*Opcode*/
|
|
if (deps & MODRM)
|
|
{
|
|
len++; /*ModR/M*/
|
|
if (deps & HAS_IMM8)
|
|
len++;
|
|
if (deps & HAS_IMM1632)
|
|
len += (op_32 & 0x100) ? 4 : 2;
|
|
|
|
if (op_32 & 0x200)
|
|
{
|
|
if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0)
|
|
{
|
|
/* Has SIB*/
|
|
len++;
|
|
if ((fetchdat & 0xc0) == 0x40)
|
|
len++;
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
|
len += 4;
|
|
else if ((fetchdat & 0x700) == 0x500)
|
|
len += 4;
|
|
}
|
|
else
|
|
{
|
|
if ((fetchdat & 0xc0) == 0x40)
|
|
len++;
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
|
len += 4;
|
|
else if ((fetchdat & 0xc7) == 0x05)
|
|
len += 4;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((fetchdat & 0xc0) == 0x40)
|
|
len++;
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
|
len += 2;
|
|
else if ((fetchdat & 0xc7) == 0x06)
|
|
len += 2;
|
|
}
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static void decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8)
|
|
{
|
|
uint32_t regmask_required;
|
|
uint32_t regmask_modified;
|
|
int c;
|
|
int d = 0; /*Complex decoder uOPs*/
|
|
int earliest_start = 0;
|
|
decode_type_t decode_type = ins->decode_type;
|
|
int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32);
|
|
|
|
/*Generate input register mask, and determine the earliest time this
|
|
instruction can start. This is not accurate, as this is calculated per
|
|
x86 instruction when it should be handled per uop*/
|
|
regmask_required = get_dstdep_mask(deps, fetchdat, bit8);
|
|
regmask_required |= get_addr_regmask(deps, fetchdat, op_32);
|
|
for (c = 0; c < 8; c++)
|
|
{
|
|
if (regmask_required & (1 << c))
|
|
{
|
|
if (reg_available_timestamp[c] > decode_timestamp)
|
|
earliest_start = reg_available_timestamp[c];
|
|
}
|
|
}
|
|
if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp)
|
|
earliest_start = fpu_st_timestamp[0];
|
|
if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp)
|
|
earliest_start = fpu_st_timestamp[1];
|
|
if ((deps & FPU_RW_STREG))
|
|
{
|
|
int reg = fetchdat & 7;
|
|
|
|
if (fpu_st_timestamp[reg] > decode_timestamp)
|
|
earliest_start = fpu_st_timestamp[reg];
|
|
}
|
|
|
|
/*Simple decoders are limited to 7 bytes & 1 uOP*/
|
|
if (decode_type == DECODE_SIMPLE && instr_length > 7)
|
|
decode_type = DECODE_COMPLEX;
|
|
else if (decode_type == DECODE_SIMPLE && ins->nr_uops > 1)
|
|
decode_type = DECODE_COMPLEX;
|
|
|
|
switch (decode_type)
|
|
{
|
|
case DECODE_SIMPLE:
|
|
if (decode_buffer.nr_uops - d == 2)
|
|
{
|
|
decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0];
|
|
decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start;
|
|
decode_buffer.nr_uops = 3;
|
|
decode_flush_p6();
|
|
}
|
|
else if (decode_buffer.nr_uops - d == 1)
|
|
{
|
|
decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0];
|
|
decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start;
|
|
decode_buffer.nr_uops = 2+d;
|
|
if (d)
|
|
decode_flush_p6();
|
|
}
|
|
else if (decode_buffer.nr_uops)
|
|
{
|
|
decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0];
|
|
decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start;
|
|
decode_buffer.nr_uops = 1+d;
|
|
}
|
|
else
|
|
{
|
|
decode_buffer.nr_uops = 1;
|
|
decode_buffer.uops[0] = &ins->uop[0];
|
|
decode_buffer.earliest_start[0] = earliest_start;
|
|
}
|
|
break;
|
|
|
|
case DECODE_COMPLEX:
|
|
if (decode_buffer.nr_uops)
|
|
decode_flush_p6(); /*The 4-1-1 arrangement implies that a complex ins. can't be decoded after a simple one*/
|
|
|
|
d = 0;
|
|
|
|
for (c = 0; c < ins->nr_uops; c++)
|
|
{
|
|
decode_buffer.uops[d] = &ins->uop[c];
|
|
decode_buffer.earliest_start[c] = earliest_start;
|
|
d++;
|
|
|
|
if (d == 3 && ins->nr_uops > 4) /*Ins. with >4 uOPs require the use of special units only present on 3 translate PLAs*/
|
|
{
|
|
d = 0;
|
|
decode_buffer.nr_uops = 3;
|
|
decode_flush_p6(); /*The other two decoders are halted to preserve in-order issue*/
|
|
}
|
|
}
|
|
if (d)
|
|
{
|
|
decode_buffer.nr_uops = d;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*Update write timestamps for any output registers*/
|
|
regmask_modified = get_dstdep_mask(deps, fetchdat, bit8);
|
|
for (c = 0; c < 8; c++)
|
|
{
|
|
if (regmask_modified & (1 << c))
|
|
reg_available_timestamp[c] = last_complete_timestamp;
|
|
}
|
|
if (deps & FPU_POP)
|
|
{
|
|
for (c = 0; c < 7; c++)
|
|
fpu_st_timestamp[c] = fpu_st_timestamp[c+1];
|
|
fpu_st_timestamp[7] = 0;
|
|
}
|
|
if (deps & FPU_POP2)
|
|
{
|
|
for (c = 0; c < 6; c++)
|
|
fpu_st_timestamp[c] = fpu_st_timestamp[c+2];
|
|
fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0;
|
|
}
|
|
if (deps & FPU_PUSH)
|
|
{
|
|
for (c = 0; c < 7; c++)
|
|
fpu_st_timestamp[c+1] = fpu_st_timestamp[c];
|
|
fpu_st_timestamp[0] = 0;
|
|
}
|
|
if (deps & FPU_WRITE_ST0)
|
|
fpu_st_timestamp[0] = last_complete_timestamp;
|
|
if (deps & FPU_WRITE_ST1)
|
|
fpu_st_timestamp[1] = last_complete_timestamp;
|
|
if (deps & FPU_WRITE_STREG)
|
|
{
|
|
int reg = fetchdat & 7;
|
|
if (deps & FPU_POP)
|
|
reg--;
|
|
if (reg >= 0 &&
|
|
!(reg == 0 && (deps & FPU_WRITE_ST0)) &&
|
|
!(reg == 1 && (deps & FPU_WRITE_ST1)))
|
|
fpu_st_timestamp[reg] = last_complete_timestamp;
|
|
}
|
|
}
|
|
|
|
void codegen_timing_p6_block_start()
|
|
{
|
|
int c;
|
|
|
|
for (c = 0; c < nr_units; c++)
|
|
units[c].first_available_cycle = 0;
|
|
|
|
decode_timestamp = 0;
|
|
last_complete_timestamp = 0;
|
|
|
|
for (c = 0; c < NR_OPSEQS; c++)
|
|
opseq_completion_timestamp[c] = 0;
|
|
next_opseq = 0;
|
|
|
|
for (c = 0; c < NR_REGS; c++)
|
|
reg_available_timestamp[c] = 0;
|
|
for (c = 0; c < 8; c++)
|
|
fpu_st_timestamp[c] = 0;
|
|
}
|
|
|
|
void codegen_timing_p6_start()
|
|
{
|
|
if (machines[machine].cpu[cpu_manufacturer].cpus[cpu].cpu_type == CPU_PENTIUMPRO)
|
|
{
|
|
units = ppro_units;
|
|
nr_units = NR_PPRO_UNITS;
|
|
}
|
|
else
|
|
{
|
|
units = p2_units;
|
|
nr_units = NR_P2_UNITS;
|
|
}
|
|
last_prefix = 0;
|
|
prefixes = 0;
|
|
}
|
|
|
|
void codegen_timing_p6_prefix(uint8_t prefix, uint32_t fetchdat)
|
|
{
|
|
if (prefix != 0x0f)
|
|
decode_timestamp++;
|
|
|
|
last_prefix = prefix;
|
|
prefixes++;
|
|
}
|
|
|
|
void codegen_timing_p6_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
|
{
|
|
const risc86_instruction_t **ins_table;
|
|
uint64_t *deps;
|
|
int mod3 = ((fetchdat & 0xc0) == 0xc0);
|
|
int old_last_complete_timestamp = last_complete_timestamp;
|
|
int bit8 = !(opcode & 1);
|
|
|
|
switch (last_prefix)
|
|
{
|
|
case 0x0f:
|
|
if (opcode == 0x0f)
|
|
{
|
|
/*3DNow has the actual opcode after ModR/M, SIB and any offset*/
|
|
uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/
|
|
uint8_t modrm = fetchdat & 0xff;
|
|
uint8_t sib = (fetchdat >> 8) & 0xff;
|
|
|
|
if ((modrm & 0xc0) != 0xc0)
|
|
{
|
|
if (op_32 & 0x200)
|
|
{
|
|
if ((modrm & 7) == 4)
|
|
{
|
|
/* Has SIB*/
|
|
opcode_pc++;
|
|
if ((modrm & 0xc0) == 0x40)
|
|
opcode_pc++;
|
|
else if ((modrm & 0xc0) == 0x80)
|
|
opcode_pc += 4;
|
|
else if ((sib & 0x07) == 0x05)
|
|
opcode_pc += 4;
|
|
}
|
|
else
|
|
{
|
|
if ((modrm & 0xc0) == 0x40)
|
|
opcode_pc++;
|
|
else if ((modrm & 0xc0) == 0x80)
|
|
opcode_pc += 4;
|
|
else if ((modrm & 0xc7) == 0x05)
|
|
opcode_pc += 4;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((modrm & 0xc0) == 0x40)
|
|
opcode_pc++;
|
|
else if ((modrm & 0xc0) == 0x80)
|
|
opcode_pc += 2;
|
|
else if ((modrm & 0xc7) == 0x06)
|
|
opcode_pc += 2;
|
|
}
|
|
}
|
|
|
|
opcode = fastreadb(cs + opcode_pc);
|
|
|
|
ins_table = mod3 ? opcode_timings_0f0f_mod3 : opcode_timings_0f0f;
|
|
deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f;
|
|
}
|
|
else
|
|
{
|
|
ins_table = mod3 ? opcode_timings_0f_mod3 : opcode_timings_0f;
|
|
deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f;
|
|
}
|
|
break;
|
|
|
|
case 0xd8:
|
|
ins_table = mod3 ? opcode_timings_d8_mod3 : opcode_timings_d8;
|
|
deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
case 0xd9:
|
|
ins_table = mod3 ? opcode_timings_d9_mod3 : opcode_timings_d9;
|
|
deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9;
|
|
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
|
break;
|
|
case 0xda:
|
|
ins_table = mod3 ? opcode_timings_da_mod3 : opcode_timings_da;
|
|
deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
case 0xdb:
|
|
ins_table = mod3 ? opcode_timings_db_mod3 : opcode_timings_db;
|
|
deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db;
|
|
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
|
break;
|
|
case 0xdc:
|
|
ins_table = mod3 ? opcode_timings_dc_mod3 : opcode_timings_dc;
|
|
deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
case 0xdd:
|
|
ins_table = mod3 ? opcode_timings_dd_mod3 : opcode_timings_dd;
|
|
deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
case 0xde:
|
|
ins_table = mod3 ? opcode_timings_de_mod3 : opcode_timings_de;
|
|
deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
case 0xdf:
|
|
ins_table = mod3 ? opcode_timings_df_mod3 : opcode_timings_df;
|
|
deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df;
|
|
opcode = (opcode >> 3) & 7;
|
|
break;
|
|
|
|
default:
|
|
switch (opcode)
|
|
{
|
|
case 0x80: case 0x82:
|
|
ins_table = mod3 ? opcode_timings_80_mod3 : opcode_timings_80;
|
|
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
case 0x81: case 0x83:
|
|
ins_table = mod3 ? opcode_timings_8x_mod3 : opcode_timings_8x;
|
|
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
|
|
case 0xc0: case 0xd0: case 0xd2:
|
|
ins_table = mod3 ? opcode_timings_shift_b_mod3 : opcode_timings_shift_b;
|
|
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
|
|
case 0xc1: case 0xd1: case 0xd3:
|
|
ins_table = mod3 ? opcode_timings_shift_mod3 : opcode_timings_shift;
|
|
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
|
|
case 0xf6:
|
|
ins_table = mod3 ? opcode_timings_f6_mod3 : opcode_timings_f6;
|
|
deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
case 0xf7:
|
|
ins_table = mod3 ? opcode_timings_f7_mod3 : opcode_timings_f7;
|
|
deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
case 0xff:
|
|
ins_table = mod3 ? opcode_timings_ff_mod3 : opcode_timings_ff;
|
|
deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff;
|
|
opcode = (fetchdat >> 3) & 7;
|
|
break;
|
|
|
|
default:
|
|
ins_table = mod3 ? opcode_timings_mod3 : opcode_timings;
|
|
deps = mod3 ? opcode_deps_mod3 : opcode_deps;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (ins_table[opcode])
|
|
decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8);
|
|
else
|
|
decode_instruction(&complex_alu1_op, 0, fetchdat, op_32, bit8);
|
|
codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp);
|
|
}
|
|
|
|
void codegen_timing_p6_block_end()
|
|
{
|
|
if (decode_buffer.nr_uops)
|
|
{
|
|
int old_last_complete_timestamp = last_complete_timestamp;
|
|
decode_flush_p6();
|
|
codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp);
|
|
}
|
|
}
|
|
|
|
int codegen_timing_p6_jump_cycles()
|
|
{
|
|
if (decode_buffer.nr_uops)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
codegen_timing_t codegen_timing_p6 =
|
|
{
|
|
codegen_timing_p6_start,
|
|
codegen_timing_p6_prefix,
|
|
codegen_timing_p6_opcode,
|
|
codegen_timing_p6_block_start,
|
|
codegen_timing_p6_block_end,
|
|
codegen_timing_p6_jump_cycles
|
|
}; |