2233 lines
109 KiB
C
2233 lines
109 KiB
C
|
|
/*Most of the vector instructions here are a total guess.
|
||
|
|
Some of the timings are based on http://http://web.archive.org/web/20181122095446/http://users.atw.hu/instlatx64/AuthenticAMD0000562_k5_InstLatX86.txt*/
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <stdint.h>
|
||
|
|
#include <string.h>
|
||
|
|
#include <wchar.h>
|
||
|
|
#include <86box/86box.h>
|
||
|
|
#include <86box/mem.h>
|
||
|
|
#include "cpu.h"
|
||
|
|
#include <86box/machine.h>
|
||
|
|
|
||
|
|
#include "x86.h"
|
||
|
|
#include "x86_ops.h"
|
||
|
|
#include "x86seg_common.h"
|
||
|
|
#include "x87_sf.h"
|
||
|
|
#include "x87.h"
|
||
|
|
#include "386_common.h"
|
||
|
|
#include "codegen.h"
|
||
|
|
#include "codegen_ops.h"
|
||
|
|
#include "codegen_timing_common.h"
|
||
|
|
|
||
|
|
typedef enum uop_type_t {
|
||
|
|
UOP_ALU = 0, /*Executes in Integer X or Y units*/
|
||
|
|
UOP_ALUX, /*Executes in Integer X unit*/
|
||
|
|
UOP_LOAD, /*Executes in Load unit*/
|
||
|
|
UOP_STORE, /*Executes in Store unit*/
|
||
|
|
UOP_FLOAD, /*Executes in Load unit*/
|
||
|
|
UOP_FSTORE, /*Executes in Store unit*/
|
||
|
|
UOP_MLOAD, /*Executes in Load unit*/
|
||
|
|
UOP_MSTORE, /*Executes in Store unit*/
|
||
|
|
UOP_FLOAT, /*Executes in Floating Point unit*/
|
||
|
|
UOP_MEU, /*Executes in Multimedia unit*/
|
||
|
|
UOP_MEU_SHIFT, /*Executes in Multimedia unit or ALU X/Y. Uses MMX shifter*/
|
||
|
|
UOP_MEU_MUL, /*Executes in Multimedia unit or ALU X/Y. Uses MMX/3DNow multiplier*/
|
||
|
|
UOP_MEU_3DN, /*Executes in Multimedia unit or ALU X/Y. Uses 3DNow ALU*/
|
||
|
|
UOP_BRANCH, /*Executes in Branch unit*/
|
||
|
|
UOP_LIMM /*Does not require an execution unit*/
|
||
|
|
} uop_type_t;
|
||
|
|
|
||
|
|
typedef enum decode_type_t {
|
||
|
|
DECODE_SHORT,
|
||
|
|
DECODE_LONG,
|
||
|
|
DECODE_VECTOR
|
||
|
|
} decode_type_t;
|
||
|
|
|
||
|
|
#define MAX_UOPS 10
|
||
|
|
|
||
|
|
typedef struct risc86_uop_t {
|
||
|
|
uop_type_t type;
|
||
|
|
int throughput;
|
||
|
|
int latency;
|
||
|
|
} risc86_uop_t;
|
||
|
|
|
||
|
|
typedef struct risc86_instruction_t {
|
||
|
|
int nr_uops;
|
||
|
|
decode_type_t decode_type;
|
||
|
|
risc86_uop_t uop[MAX_UOPS];
|
||
|
|
} risc86_instruction_t;
|
||
|
|
|
||
|
|
static const risc86_instruction_t alu_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t alux_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_alu_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_alux_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t alu_store_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t alux_store_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t branch_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t limm_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LIMM, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t load_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t store_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t bswap_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t leave_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t lods_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t loop_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t mov_reg_seg_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t movs_op = {
|
||
|
|
.nr_uops = 4,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t pop_reg_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t pop_mem_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t push_imm_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2},
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t push_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t push_seg_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t stos_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[1] = {.type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t test_reg_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t test_reg_b_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t test_mem_imm_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t test_mem_imm_b_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t xchg_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_LONG,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t m3dn_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MEU_3DN, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t mmx_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MEU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t mmx_mul_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t mmx_shift_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_3dn_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_MEU_3DN, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_mmx_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_MEU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_mmx_mul_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_mmx_shift_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_MEU_SHIFT, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t mload_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MLOAD, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t mstore_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MSTORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t pmul_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_MEU_MUL, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t pmul_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_MEU_MUL, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t float_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t load_float_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t fstore_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FSTORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t fdiv_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 40, .latency = 40}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t fdiv_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_FLOAT, .throughput = 40, .latency = 40}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t fsin_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 62, .latency = 62}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t fsqrt_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_SHORT,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 41, .latency = 41}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t vector_fldcw_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 8, .latency = 8}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_float_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_float_l_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 50, .latency = 50}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_flde_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_FLOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_FLOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[2] = { .type = UOP_FLOAT, .throughput = 2, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_fste_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_FLOAT, .throughput = 2, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t vector_alu1_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alu2_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alu3_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alu6_op = {
|
||
|
|
.nr_uops = 6,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[4] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[5] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alux1_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alux3_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alux6_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[4] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[5] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alu_store_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_alux_store_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_arpl_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_bound_op = {
|
||
|
|
.nr_uops = 4,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_bsx_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 10, .latency = 10}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_call_far_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cli_sti_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 7, .latency = 7}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cmps_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cmpsb_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cmpxchg_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cmpxchg_b_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_cpuid_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 22, .latency = 22}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_div16_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 10, .latency = 10}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_div16_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 10, .latency = 10}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_div32_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 18, .latency = 18}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_div32_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 18, .latency = 18}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_emms_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 25, .latency = 25}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_enter_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 10, .latency = 10}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_femms_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 6, .latency = 6}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_in_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_ins_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 10, .latency = 11},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 },
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 }
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_int_op = {
|
||
|
|
.nr_uops = 5,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 20, .latency = 20},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1 },
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1 },
|
||
|
|
.uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1 },
|
||
|
|
.uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 }
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_iret_op = {
|
||
|
|
.nr_uops = 5,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[3] = { .type = UOP_ALU, .throughput = 20, .latency = 20},
|
||
|
|
.uop[4] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1 }
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_invd_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1000, .latency = 1000}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_jmp_far_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3},
|
||
|
|
.uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_load_alu_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_load_alux_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_loop_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_lss_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 3, .latency = 3}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mov_mem_seg_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mov_seg_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mov_seg_reg_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 3, .latency = 3}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mul_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mul_mem_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mul64_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_mul64_mem_op = {
|
||
|
|
.nr_uops = 4,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_out_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 10, .latency = 10}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_outs_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1 },
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 10, .latency = 10},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1 }
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_pusha_op = {
|
||
|
|
.nr_uops = 8,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[4] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[5] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[6] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[7] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_popa_op = {
|
||
|
|
.nr_uops = 8,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[3] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[4] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[5] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[6] = { .type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[7] = { .type = UOP_LOAD, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_popf_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2 },
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 17, .latency = 17}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_push_mem_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_pushf_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_ret_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_retf_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 3, .latency = 3},
|
||
|
|
.uop[2] = { .type = UOP_BRANCH, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_scas_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_scasb_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_setcc_mem_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_FSTORE, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_setcc_reg_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_test_mem_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_test_mem_b_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 2},
|
||
|
|
.uop[1] = { .type = UOP_ALUX, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_xchg_mem_op = {
|
||
|
|
.nr_uops = 3,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_LOAD, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_STORE, .throughput = 1, .latency = 1},
|
||
|
|
.uop[2] = { .type = UOP_ALU, .throughput = 1, .latency = 1}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_xlat_op = {
|
||
|
|
.nr_uops = 2,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 1, .latency = 1},
|
||
|
|
.uop[1] = { .type = UOP_LOAD, .throughput = 1, .latency = 2}
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t vector_wbinvd_op = {
|
||
|
|
.nr_uops = 1,
|
||
|
|
.decode_type = DECODE_VECTOR,
|
||
|
|
.uop[0] = {.type = UOP_ALU, .throughput = 10000, .latency = 10000}
|
||
|
|
};
|
||
|
|
|
||
|
|
#define INVALID NULL
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/* ADD ADD ADD ADD*/
|
||
|
|
/*00*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* ADD ADD PUSH ES POP ES*/
|
||
|
|
&alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
/* OR OR OR OR*/
|
||
|
|
&alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* OR OR PUSH CS */
|
||
|
|
&alux_op, &alu_op, &push_seg_op, INVALID,
|
||
|
|
|
||
|
|
/* ADC ADC ADC ADC*/
|
||
|
|
/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op,
|
||
|
|
/* ADC ADC PUSH SS POP SS*/
|
||
|
|
&vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
/* SBB SBB SBB SBB*/
|
||
|
|
/*10*/ &vector_alux_store_op, &vector_alu_store_op, &vector_load_alux_op, &vector_load_alu_op,
|
||
|
|
/* SBB SBB PUSH DS POP DS*/
|
||
|
|
&vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
|
||
|
|
/* AND AND AND AND*/
|
||
|
|
/*20*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* AND AND DAA*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux1_op,
|
||
|
|
/* SUB SUB SUB SUB*/
|
||
|
|
&alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* SUB SUB DAS*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux1_op,
|
||
|
|
|
||
|
|
/* XOR XOR XOR XOR*/
|
||
|
|
/*30*/ &alux_store_op, &alu_store_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* XOR XOR AAA*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux6_op,
|
||
|
|
/* CMP CMP CMP CMP*/
|
||
|
|
&load_alux_op, &load_alu_op, &load_alux_op, &load_alu_op,
|
||
|
|
/* CMP CMP AAS*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux6_op,
|
||
|
|
|
||
|
|
/* INC EAX INC ECX INC EDX INC EBX*/
|
||
|
|
/*40*/ &alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* INC ESP INC EBP INC ESI INC EDI*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
|
||
|
|
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
|
||
|
|
/*50*/ &store_op, &store_op, &store_op, &store_op,
|
||
|
|
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
|
||
|
|
&store_op, &store_op, &store_op, &store_op,
|
||
|
|
/* POP EAX POP ECX POP EDX POP EBX*/
|
||
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
||
|
|
/* POP ESP POP EBP POP ESI POP EDI*/
|
||
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
||
|
|
|
||
|
|
/* PUSHA POPA BOUND ARPL*/
|
||
|
|
/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* PUSH imm IMUL PUSH imm IMUL*/
|
||
|
|
&push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op,
|
||
|
|
/* INSB INSW OUTSB OUTSW*/
|
||
|
|
&vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op,
|
||
|
|
|
||
|
|
/* Jxx*/
|
||
|
|
/*70*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
|
||
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* TEST TEST XCHG XCHG*/
|
||
|
|
&vector_test_mem_b_op, &vector_test_mem_op, &vector_xchg_mem_op, &vector_xchg_mem_op,
|
||
|
|
/* MOV MOV MOV MOV*/
|
||
|
|
&store_op, &store_op, &load_op, &load_op,
|
||
|
|
/* MOV from seg LEA MOV to seg POP*/
|
||
|
|
&vector_mov_mem_seg_op, &store_op, &vector_mov_seg_mem_op, &pop_mem_op,
|
||
|
|
|
||
|
|
/* NOP XCHG XCHG XCHG*/
|
||
|
|
/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op,
|
||
|
|
/* XCHG XCHG XCHG XCHG*/
|
||
|
|
&xchg_op, &xchg_op, &xchg_op, &xchg_op,
|
||
|
|
/* CBW CWD CALL far WAIT*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op,
|
||
|
|
/* PUSHF POPF SAHF LAHF*/
|
||
|
|
&vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op,
|
||
|
|
|
||
|
|
/* MOV MOV MOV MOV*/
|
||
|
|
/*a0*/ &load_op, &load_op, &store_op, &store_op,
|
||
|
|
/* MOVSB MOVSW CMPSB CMPSW*/
|
||
|
|
&movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op,
|
||
|
|
/* TEST TEST STOSB STOSW*/
|
||
|
|
&test_reg_b_op, &test_reg_op, &stos_op, &stos_op,
|
||
|
|
/* LODSB LODSW SCASB SCASW*/
|
||
|
|
&lods_op, &lods_op, &vector_scasb_op, &vector_scas_op,
|
||
|
|
|
||
|
|
/* MOV*/
|
||
|
|
/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
|
||
|
|
/* RET imm RET*/
|
||
|
|
/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op,
|
||
|
|
/* LES LDS MOV MOV*/
|
||
|
|
&vector_lss_op, &vector_lss_op, &store_op, &store_op,
|
||
|
|
/* ENTER LEAVE RETF RETF*/
|
||
|
|
&vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op,
|
||
|
|
/* INT3 INT INTO IRET*/
|
||
|
|
&vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op,
|
||
|
|
|
||
|
|
|
||
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* AAM AAD SETALC XLAT*/
|
||
|
|
&vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* LOOPNE LOOPE LOOP JCXZ*/
|
||
|
|
/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op,
|
||
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
||
|
|
&vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op,
|
||
|
|
/* CALL JMP JMP JMP*/
|
||
|
|
&store_op, &branch_op, &vector_jmp_far_op, &branch_op,
|
||
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
||
|
|
&vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op,
|
||
|
|
|
||
|
|
/* REPNE REPE*/
|
||
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* HLT CMC*/
|
||
|
|
&vector_alux1_op, &vector_alu2_op, INVALID, INVALID,
|
||
|
|
/* CLC STC CLI STI*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op,
|
||
|
|
/* CLD STD INCDEC*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &alux_store_op, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_mod3[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/* ADD ADD ADD ADD*/
|
||
|
|
/*00*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* ADD ADD PUSH ES POP ES*/
|
||
|
|
&alux_op, &alu_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
/* OR OR OR OR*/
|
||
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* OR OR PUSH CS */
|
||
|
|
&alux_op, &alu_op, &push_seg_op, INVALID,
|
||
|
|
|
||
|
|
/* ADC ADC ADC ADC*/
|
||
|
|
/*10*/ &vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op,
|
||
|
|
/* ADC ADC PUSH SS POP SS*/
|
||
|
|
&vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
/* SBB SBB SBB SBB*/
|
||
|
|
&vector_alux1_op, &vector_alu1_op, &vector_alux1_op, &vector_alu1_op,
|
||
|
|
/* SBB SBB PUSH DS POP DS*/
|
||
|
|
&vector_alux1_op, &vector_alu1_op, &push_seg_op, &vector_mov_seg_mem_op,
|
||
|
|
|
||
|
|
/* AND AND AND AND*/
|
||
|
|
/*20*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* AND AND DAA*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux1_op,
|
||
|
|
/* SUB SUB SUB SUB*/
|
||
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* SUB SUB DAS*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux1_op,
|
||
|
|
|
||
|
|
/* XOR XOR XOR XOR*/
|
||
|
|
/*30*/ &alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* XOR XOR AAA*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux6_op,
|
||
|
|
/* CMP CMP CMP CMP*/
|
||
|
|
&alux_op, &alu_op, &alux_op, &alu_op,
|
||
|
|
/* CMP CMP AAS*/
|
||
|
|
&alux_op, &alu_op, INVALID, &vector_alux6_op,
|
||
|
|
|
||
|
|
/* INC EAX INC ECX INC EDX INC EBX*/
|
||
|
|
/*40*/ &alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* INC ESP INC EBP INC ESI INC EDI*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* DEC EAX DEC ECX DEC EDX DEC EBX*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
/* DEC ESP DEC EBP DEC ESI DEC EDI*/
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
|
||
|
|
/* PUSH EAX PUSH ECX PUSH EDX PUSH EBX*/
|
||
|
|
/*50*/ &store_op, &store_op, &store_op, &store_op,
|
||
|
|
/* PUSH ESP PUSH EBP PUSH ESI PUSH EDI*/
|
||
|
|
&store_op, &store_op, &store_op, &store_op,
|
||
|
|
/* POP EAX POP ECX POP EDX POP EBX*/
|
||
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
||
|
|
/* POP ESP POP EBP POP ESI POP EDI*/
|
||
|
|
&pop_reg_op, &pop_reg_op, &pop_reg_op, &pop_reg_op,
|
||
|
|
|
||
|
|
/* PUSHA POPA BOUND ARPL*/
|
||
|
|
/*60*/ &vector_pusha_op, &vector_popa_op, &vector_bound_op, &vector_arpl_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* PUSH imm IMUL PUSH imm IMUL*/
|
||
|
|
&push_imm_op, &vector_mul_op, &push_imm_op, &vector_mul_op,
|
||
|
|
/* INSB INSW OUTSB OUTSW*/
|
||
|
|
&vector_ins_op, &vector_ins_op, &vector_outs_op, &vector_outs_op,
|
||
|
|
|
||
|
|
/* Jxx*/
|
||
|
|
/*70*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
|
||
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* TEST TEST XCHG XCHG*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_alu3_op, &vector_alu3_op,
|
||
|
|
/* MOV MOV MOV MOV*/
|
||
|
|
&store_op, &store_op, &load_op, &load_op,
|
||
|
|
/* MOV from seg LEA MOV to seg POP*/
|
||
|
|
&mov_reg_seg_op, &store_op, &vector_mov_seg_reg_op, &pop_reg_op,
|
||
|
|
|
||
|
|
/* NOP XCHG XCHG XCHG*/
|
||
|
|
/*90*/ &limm_op, &xchg_op, &xchg_op, &xchg_op,
|
||
|
|
/* XCHG XCHG XCHG XCHG*/
|
||
|
|
&xchg_op, &xchg_op, &xchg_op, &xchg_op,
|
||
|
|
/* CBW CWD CALL far WAIT*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_call_far_op, &limm_op,
|
||
|
|
/* PUSHF POPF SAHF LAHF*/
|
||
|
|
&vector_pushf_op, &vector_popf_op, &vector_alux1_op, &vector_alux1_op,
|
||
|
|
|
||
|
|
/* MOV MOV MOV MOV*/
|
||
|
|
/*a0*/ &load_op, &load_op, &store_op, &store_op,
|
||
|
|
/* MOVSB MOVSW CMPSB CMPSW*/
|
||
|
|
&movs_op, &movs_op, &vector_cmpsb_op, &vector_cmps_op,
|
||
|
|
/* TEST TEST STOSB STOSW*/
|
||
|
|
&test_reg_b_op, &test_reg_op, &stos_op, &stos_op,
|
||
|
|
/* LODSB LODSW SCASB SCASW*/
|
||
|
|
&lods_op, &lods_op, &vector_scasb_op, &vector_scas_op,
|
||
|
|
|
||
|
|
/* MOV*/
|
||
|
|
/*b0*/ &limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
&limm_op, &limm_op, &limm_op, &limm_op,
|
||
|
|
|
||
|
|
/* RET imm RET*/
|
||
|
|
/*c0*/ INVALID, INVALID, &vector_ret_op, &vector_ret_op,
|
||
|
|
/* LES LDS MOV MOV*/
|
||
|
|
&vector_lss_op, &vector_lss_op, &store_op, &store_op,
|
||
|
|
/* ENTER LEAVE RETF RETF*/
|
||
|
|
&vector_enter_op, &leave_op, &vector_retf_op, &vector_retf_op,
|
||
|
|
/* INT3 INT INTO IRET*/
|
||
|
|
&vector_int_op, &vector_int_op, &vector_int_op, &vector_iret_op,
|
||
|
|
|
||
|
|
|
||
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* AAM AAD SETALC XLAT*/
|
||
|
|
&vector_alux6_op, &vector_alux3_op, &vector_alux1_op, &vector_xlat_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* LOOPNE LOOPE LOOP JCXZ*/
|
||
|
|
/*e0*/ &vector_loop_op, &vector_loop_op, &loop_op, &vector_loop_op,
|
||
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
||
|
|
&vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op,
|
||
|
|
/* CALL JMP JMP JMP*/
|
||
|
|
&store_op, &branch_op, &vector_jmp_far_op, &branch_op,
|
||
|
|
/* IN AL IN AX OUT_AL OUT_AX*/
|
||
|
|
&vector_in_op, &vector_in_op, &vector_out_op, &vector_out_op,
|
||
|
|
|
||
|
|
/* REPNE REPE*/
|
||
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* HLT CMC*/
|
||
|
|
&vector_alux1_op, &vector_alu2_op, INVALID, INVALID,
|
||
|
|
/* CLC STC CLI STI*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_cli_sti_op, &vector_cli_sti_op,
|
||
|
|
/* CLD STD INCDEC*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_alux1_op, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_0f[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op,
|
||
|
|
INVALID, &vector_alu6_op, &vector_alu6_op, INVALID,
|
||
|
|
&vector_invd_op, &vector_wbinvd_op, INVALID, INVALID,
|
||
|
|
INVALID, &load_op, &vector_femms_op, &load_3dn_op,
|
||
|
|
|
||
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op,
|
||
|
|
&vector_alu6_op, &vector_alu6_op, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*60*/ &load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
||
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
||
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &load_mmx_op,
|
||
|
|
INVALID, INVALID, &mload_op, &mload_op,
|
||
|
|
|
||
|
|
/*70*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
||
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, &vector_emms_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, &mstore_op, &mstore_op,
|
||
|
|
|
||
|
|
/*80*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
|
||
|
|
/*90*/ &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op,
|
||
|
|
&vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op,
|
||
|
|
&vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op,
|
||
|
|
&vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op, &vector_setcc_reg_op,
|
||
|
|
|
||
|
|
/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_load_alu_op,
|
||
|
|
&vector_alu_store_op, &vector_alu_store_op, INVALID, INVALID,
|
||
|
|
&push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_load_alu_op,
|
||
|
|
&vector_alu_store_op, &vector_alu_store_op, INVALID, &vector_mul_op,
|
||
|
|
|
||
|
|
/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_load_alu_op,
|
||
|
|
&vector_lss_op, &vector_lss_op, &load_alux_op, &load_alu_op,
|
||
|
|
INVALID, INVALID, &vector_load_alu_op, &vector_load_alu_op,
|
||
|
|
&vector_bsx_op, &vector_bsx_op, &load_alux_op, &load_alu_op,
|
||
|
|
|
||
|
|
/*c0*/ &vector_alux_store_op, &vector_alu_store_op, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, &vector_cmpxchg_op,
|
||
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
||
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
||
|
|
|
||
|
|
/*d0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
||
|
|
INVALID, &load_mmx_mul_op, INVALID, INVALID,
|
||
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
||
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
||
|
|
|
||
|
|
/*e0*/ &load_mmx_op, &load_mmx_shift_op, &load_mmx_shift_op, INVALID,
|
||
|
|
INVALID, &pmul_mem_op, INVALID, INVALID,
|
||
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
||
|
|
&load_mmx_op, &load_mmx_op, INVALID, &load_mmx_op,
|
||
|
|
|
||
|
|
/*f0*/ INVALID, &load_mmx_shift_op, &load_mmx_shift_op, &load_mmx_shift_op,
|
||
|
|
INVALID, &pmul_mem_op, INVALID, INVALID,
|
||
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID,
|
||
|
|
&load_mmx_op, &load_mmx_op, &load_mmx_op, INVALID,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_0f_mod3[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/*00*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op,
|
||
|
|
INVALID, &vector_alu6_op, &vector_alu6_op, INVALID,
|
||
|
|
&vector_invd_op, &vector_wbinvd_op, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, &vector_femms_op, &m3dn_op,
|
||
|
|
|
||
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*20*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, &vector_alu6_op,
|
||
|
|
&vector_alu6_op, &vector_alu6_op, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*30*/ &vector_alu6_op, &vector_alu6_op, &vector_alu6_op, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*60*/ &mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
||
|
|
&mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
||
|
|
&mmx_op, &mmx_op, &mmx_op, &mmx_op,
|
||
|
|
INVALID, INVALID, &mmx_op, &mmx_op,
|
||
|
|
|
||
|
|
/*70*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
||
|
|
&mmx_op, &mmx_op, &mmx_op, &vector_emms_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, &mmx_op, &mmx_op,
|
||
|
|
|
||
|
|
/*80*/ &branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
&branch_op, &branch_op, &branch_op, &branch_op,
|
||
|
|
|
||
|
|
/*90*/ &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op,
|
||
|
|
&vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op,
|
||
|
|
&vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op,
|
||
|
|
&vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op, &vector_setcc_mem_op,
|
||
|
|
|
||
|
|
/*a0*/ &push_seg_op, &vector_mov_seg_mem_op, &vector_cpuid_op, &vector_alu1_op,
|
||
|
|
&vector_alu1_op, &vector_alu1_op, INVALID, INVALID,
|
||
|
|
&push_seg_op, &vector_mov_seg_mem_op, INVALID, &vector_alu1_op,
|
||
|
|
&vector_alu1_op, &vector_alu1_op, INVALID, &vector_mul_op,
|
||
|
|
|
||
|
|
/*b0*/ &vector_cmpxchg_b_op, &vector_cmpxchg_op, &vector_lss_op, &vector_alu1_op,
|
||
|
|
&vector_lss_op, &vector_lss_op, &alux_op, &alu_op,
|
||
|
|
INVALID, INVALID, &vector_alu1_op, &vector_alu1_op,
|
||
|
|
&vector_bsx_op, &vector_bsx_op, &alux_op, &alu_op,
|
||
|
|
|
||
|
|
/*c0*/ &vector_alux1_op, &vector_alu1_op, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
||
|
|
&bswap_op, &bswap_op, &bswap_op, &bswap_op,
|
||
|
|
|
||
|
|
/*d0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
||
|
|
INVALID, &mmx_mul_op, INVALID, INVALID,
|
||
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
||
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
||
|
|
|
||
|
|
/*e0*/ &mmx_op, &mmx_shift_op, &mmx_shift_op, INVALID,
|
||
|
|
INVALID, &pmul_op, INVALID, INVALID,
|
||
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
||
|
|
&mmx_op, &mmx_op, INVALID, &mmx_op,
|
||
|
|
|
||
|
|
/*f0*/ INVALID, &mmx_shift_op, &mmx_shift_op, &mmx_shift_op,
|
||
|
|
INVALID, &pmul_op, INVALID, INVALID,
|
||
|
|
&mmx_op, &mmx_op, &mmx_op, INVALID,
|
||
|
|
&mmx_op, &mmx_op, &mmx_op, INVALID,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_0f0f[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/*00*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, &load_3dn_op, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, &load_3dn_op, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*20*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*30*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*60*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*70*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*90*/ &load_3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&load_3dn_op, INVALID, &load_3dn_op, &load_3dn_op,
|
||
|
|
INVALID, INVALID, &load_3dn_op, INVALID,
|
||
|
|
INVALID, INVALID, &load_3dn_op, INVALID,
|
||
|
|
|
||
|
|
/*a0*/ &load_3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&load_3dn_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op,
|
||
|
|
INVALID, INVALID, &load_3dn_op, INVALID,
|
||
|
|
INVALID, INVALID, &load_3dn_op, INVALID,
|
||
|
|
|
||
|
|
/*b0*/ &load_3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&load_mmx_mul_op, INVALID, &load_mmx_mul_op, &load_mmx_mul_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, &load_mmx_op,
|
||
|
|
|
||
|
|
/*c0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*e0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_0f0f_mod3[256] = {
|
||
|
|
// clang-format off
|
||
|
|
/*00*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, &m3dn_op, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*10*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, &m3dn_op, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*20*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*30*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*40*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*50*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*60*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*70*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*80*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*90*/ &m3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&m3dn_op, INVALID, &m3dn_op, &m3dn_op,
|
||
|
|
INVALID, INVALID, &m3dn_op, INVALID,
|
||
|
|
INVALID, INVALID, &m3dn_op, INVALID,
|
||
|
|
|
||
|
|
/*a0*/ &m3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&m3dn_op, INVALID, &mmx_mul_op, &mmx_mul_op,
|
||
|
|
INVALID, INVALID, &m3dn_op, INVALID,
|
||
|
|
INVALID, INVALID, &m3dn_op, INVALID,
|
||
|
|
|
||
|
|
/*b0*/ &m3dn_op, INVALID, INVALID, INVALID,
|
||
|
|
&mmx_mul_op, INVALID, &mmx_mul_op, &mmx_mul_op,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, &mmx_op,
|
||
|
|
|
||
|
|
/*c0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*d0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*e0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/*f0*/ INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_shift[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op,
|
||
|
|
&vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op, &vector_alu_store_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_shift_b[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op,
|
||
|
|
&vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op, &vector_alux_store_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_shift_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &vector_alu1_op, &vector_alu1_op,
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_shift_b_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&vector_alux1_op, &vector_alux1_op, &vector_alux1_op, &vector_alux1_op,
|
||
|
|
&alux_op, &alux_op, &alux_op, &alux_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_80[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&alux_store_op, &alux_store_op, &vector_alux_store_op, &vector_alux_store_op,
|
||
|
|
&alux_store_op, &alux_store_op, &alux_store_op, &alux_store_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_80_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&alux_op, &alux_op, &alux_store_op, &alux_store_op,
|
||
|
|
&alux_op, &alux_op, &alux_op, &alux_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_8x[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&alu_store_op, &alu_store_op, &vector_alu_store_op, &vector_alu_store_op,
|
||
|
|
&alu_store_op, &alu_store_op, &alu_store_op, &alu_store_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_8x_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
&alu_op, &alu_op, &alu_store_op, &alu_store_op,
|
||
|
|
&alu_op, &alu_op, &alu_op, &alu_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_f6[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* TST NOT NEG*/
|
||
|
|
&test_mem_imm_b_op, INVALID, &vector_alux_store_op, &vector_alux_store_op,
|
||
|
|
/* MUL IMUL DIV IDIV*/
|
||
|
|
&vector_mul_mem_op, &vector_mul_mem_op, &vector_div16_mem_op, &vector_div16_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_f6_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* TST NOT NEG*/
|
||
|
|
&test_reg_b_op, INVALID, &alux_op, &alux_op,
|
||
|
|
/* MUL IMUL DIV IDIV*/
|
||
|
|
&vector_mul_op, &vector_mul_op, &vector_div16_op, &vector_div16_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_f7[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* TST NOT NEG*/
|
||
|
|
&test_mem_imm_op, INVALID, &vector_alu_store_op, &vector_alu_store_op,
|
||
|
|
/* MUL IMUL DIV IDIV*/
|
||
|
|
&vector_mul64_mem_op, &vector_mul64_mem_op, &vector_div32_mem_op, &vector_div32_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_f7_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* TST NOT NEG*/
|
||
|
|
&test_reg_op, INVALID, &alu_op, &alu_op,
|
||
|
|
/* MUL IMUL DIV IDIV*/
|
||
|
|
&vector_mul64_op, &vector_mul64_op, &vector_div32_op, &vector_div32_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_ff[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* INC DEC CALL CALL far*/
|
||
|
|
&alu_store_op, &alu_store_op, &store_op, &vector_call_far_op,
|
||
|
|
/* JMP JMP far PUSH*/
|
||
|
|
&branch_op, &vector_jmp_far_op, &push_mem_op, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_ff_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* INC DEC CALL CALL far*/
|
||
|
|
&vector_alu1_op, &vector_alu1_op, &store_op, &vector_call_far_op,
|
||
|
|
/* JMP JMP far PUSH*/
|
||
|
|
&branch_op, &vector_jmp_far_op, &vector_push_mem_op, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_d8[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FADDs FMULs FCOMs FCOMPs*/
|
||
|
|
&load_float_op, &load_float_op, &load_float_op, &load_float_op,
|
||
|
|
/* FSUBs FSUBRs FDIVs FDIVRs*/
|
||
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_d8_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FADD FMUL FCOM FCOMP*/
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
/* FSUB FSUBR FDIV FDIVR*/
|
||
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_d9[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FLDs FSTs FSTPs*/
|
||
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
||
|
|
/* FLDENV FLDCW FSTENV FSTCW*/
|
||
|
|
&vector_float_l_op, &vector_fldcw_op, &vector_float_l_op, &vector_float_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_d9_mod3[64] = {
|
||
|
|
// clang-format off
|
||
|
|
/*FLD*/
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
/*FXCH*/
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
/*FNOP*/
|
||
|
|
&float_op, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/*FSTP*/
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
/* opFCHS opFABS*/
|
||
|
|
&float_op, &float_op, INVALID, INVALID,
|
||
|
|
/* opFTST opFXAM*/
|
||
|
|
&float_op, &float_op, INVALID, INVALID,
|
||
|
|
/* opFLD1 opFLDL2T opFLDL2E opFLDPI*/
|
||
|
|
&float_op, &float_op, &float_op, &float_op,
|
||
|
|
/* opFLDEG2 opFLDLN2 opFLDZ*/
|
||
|
|
&float_op, &float_op, &float_op, INVALID,
|
||
|
|
/* opF2XM1 opFYL2X opFPTAN opFPATAN*/
|
||
|
|
&fsin_op, &fsin_op, &fsin_op, &fsin_op,
|
||
|
|
/* opFDECSTP opFINCSTP,*/
|
||
|
|
INVALID, INVALID, &float_op, &float_op,
|
||
|
|
/* opFPREM opFSQRT opFSINCOS*/
|
||
|
|
&fdiv_op, INVALID, &fsqrt_op, &fsin_op,
|
||
|
|
/* opFRNDINT opFSCALE opFSIN opFCOS*/
|
||
|
|
&float_op, &fdiv_op, &fsin_op, &fsin_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_da[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FIADDl FIMULl FICOMl FICOMPl*/
|
||
|
|
&load_float_op, &load_float_op, &load_float_op, &load_float_op,
|
||
|
|
/* FISUBl FISUBRl FIDIVl FIDIVRl*/
|
||
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_da_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* FCOMPP*/
|
||
|
|
INVALID, &float_op, INVALID, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_db[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FLDil FSTil FSTPil*/
|
||
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
||
|
|
/* FLDe FSTPe*/
|
||
|
|
INVALID, &vector_flde_op, INVALID, &vector_fste_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_db_mod3[64] = {
|
||
|
|
// clang-format off
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
/* opFNOP opFCLEX opFINIT*/
|
||
|
|
INVALID, &float_op, &float_op, &float_op,
|
||
|
|
/* opFNOP opFNOP*/
|
||
|
|
&float_op, &float_op, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_dc[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FADDd FMULd FCOMd FCOMPd*/
|
||
|
|
&load_float_op, &load_float_op, &load_float_op, &load_float_op,
|
||
|
|
/* FSUBd FSUBRd FDIVd FDIVRd*/
|
||
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_dc_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* opFADDr opFMULr*/
|
||
|
|
&float_op, &float_op, INVALID, INVALID,
|
||
|
|
/* opFSUBRr opFSUBr opFDIVRr opFDIVr*/
|
||
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_dd[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FLDd FSTd FSTPd*/
|
||
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
||
|
|
/* FRSTOR FSAVE FSTSW*/
|
||
|
|
&vector_float_l_op, INVALID, &vector_float_l_op, &vector_float_l_op
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_dd_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FFFREE FST FSTP*/
|
||
|
|
&float_op, INVALID, &float_op, &float_op,
|
||
|
|
/* FUCOM FUCOMP*/
|
||
|
|
&float_op, &float_op, INVALID, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_de[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FIADDw FIMULw FICOMw FICOMPw*/
|
||
|
|
&load_float_op, &load_float_op, &load_float_op, &load_float_op,
|
||
|
|
/* FISUBw FISUBRw FIDIVw FIDIVRw*/
|
||
|
|
&load_float_op, &load_float_op, &fdiv_mem_op, &fdiv_mem_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_de_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FADDP FMULP FCOMPP*/
|
||
|
|
&float_op, &float_op, INVALID, &float_op,
|
||
|
|
/* FSUBP FSUBRP FDIVP FDIVRP*/
|
||
|
|
&float_op, &float_op, &fdiv_op, &fdiv_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_df[8] = {
|
||
|
|
// clang-format off
|
||
|
|
/* FILDiw FISTiw FISTPiw*/
|
||
|
|
&load_float_op, INVALID, &fstore_op, &fstore_op,
|
||
|
|
/* FILDiq FBSTP FISTPiq*/
|
||
|
|
INVALID, &load_float_op, &vector_float_l_op, &fstore_op,
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
static const risc86_instruction_t *opcode_timings_k5_df_mod3[8] = {
|
||
|
|
// clang-format off
|
||
|
|
INVALID, INVALID, INVALID, INVALID,
|
||
|
|
/* FSTSW AX*/
|
||
|
|
&float_op, INVALID, INVALID, INVALID
|
||
|
|
// clang-format on
|
||
|
|
};
|
||
|
|
|
||
|
|
static uint8_t last_prefix;
|
||
|
|
static int prefixes;
|
||
|
|
|
||
|
|
static int decode_timestamp;
|
||
|
|
static int last_complete_timestamp;
|
||
|
|
|
||
|
|
typedef struct k5_unit_t {
|
||
|
|
uint32_t uop_mask;
|
||
|
|
int first_available_cycle;
|
||
|
|
} k5_unit_t;
|
||
|
|
|
||
|
|
static int nr_units;
|
||
|
|
static k5_unit_t *units;
|
||
|
|
|
||
|
|
/*k5 has dedicated MMX unit*/
|
||
|
|
static k5_unit_t k5_units[] = {
|
||
|
|
{ .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) }, /*Integer X*/
|
||
|
|
{ .uop_mask = (1 << UOP_ALU) }, /*Integer Y*/
|
||
|
|
{ .uop_mask = (1 << UOP_MEU) | (1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) }, /*Multimedia*/
|
||
|
|
{ .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/
|
||
|
|
{ .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/
|
||
|
|
{ .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/
|
||
|
|
{ .uop_mask = (1 << UOP_BRANCH) } /*Branch*/
|
||
|
|
};
|
||
|
|
#define NR_k5_UNITS (sizeof(k5_units) / sizeof(k5_unit_t))
|
||
|
|
|
||
|
|
/*k5-2 and later integrate MMX into ALU X & Y, sharing multiplier, shifter and
|
||
|
|
3DNow ALU between two execution units*/
|
||
|
|
static k5_unit_t k5_2_units[] = {
|
||
|
|
{ .uop_mask = (1 << UOP_ALU) | (1 << UOP_ALUX) | (1 << UOP_MEU) | /*Integer X*/
|
||
|
|
(1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) },
|
||
|
|
{ .uop_mask = (1 << UOP_ALU) | (1 << UOP_MEU) | /*Integer Y*/
|
||
|
|
(1 << UOP_MEU_SHIFT) | (1 << UOP_MEU_MUL) | (1 << UOP_MEU_3DN) },
|
||
|
|
{ .uop_mask = (1 << UOP_FLOAT) }, /*Floating point*/
|
||
|
|
{ .uop_mask = (1 << UOP_LOAD) | (1 << UOP_FLOAD) | (1 << UOP_MLOAD) }, /*Load*/
|
||
|
|
{ .uop_mask = (1 << UOP_STORE) | (1 << UOP_FSTORE) | (1 << UOP_MSTORE) }, /*Store*/
|
||
|
|
{ .uop_mask = (1 << UOP_BRANCH) } /*Branch*/
|
||
|
|
};
|
||
|
|
#define NR_k5_2_UNITS (sizeof(k5_2_units) / sizeof(k5_unit_t))
|
||
|
|
|
||
|
|
/*First available cycles of shared execution units. Each of these can be submitted
|
||
|
|
to by ALU X and Y*/
|
||
|
|
static int mul_first_available_cycle;
|
||
|
|
static int shift_first_available_cycle;
|
||
|
|
static int m3dnow_first_available_cycle;
|
||
|
|
|
||
|
|
static int
|
||
|
|
uop_run(const risc86_uop_t *uop, int decode_time)
|
||
|
|
{
|
||
|
|
k5_unit_t *best_unit = NULL;
|
||
|
|
int best_start_cycle = 99999;
|
||
|
|
|
||
|
|
/*UOP_LIMM does not require execution*/
|
||
|
|
if (uop->type == UOP_LIMM)
|
||
|
|
return decode_time;
|
||
|
|
|
||
|
|
/*Handle shared units on k5-2 and later*/
|
||
|
|
if (units == k5_2_units) {
|
||
|
|
if (uop->type == UOP_MEU_MUL && decode_time < mul_first_available_cycle)
|
||
|
|
decode_time = mul_first_available_cycle;
|
||
|
|
else if (uop->type == UOP_MEU_SHIFT && decode_time < mul_first_available_cycle)
|
||
|
|
decode_time = shift_first_available_cycle;
|
||
|
|
else if (uop->type == UOP_MEU_3DN && decode_time < mul_first_available_cycle)
|
||
|
|
decode_time = m3dnow_first_available_cycle;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*Find execution unit for this uOP*/
|
||
|
|
for (int c = 0; c < nr_units; c++) {
|
||
|
|
if (units[c].uop_mask & (1 << uop->type)) {
|
||
|
|
if (units[c].first_available_cycle < best_start_cycle) {
|
||
|
|
best_unit = &units[c];
|
||
|
|
best_start_cycle = units[c].first_available_cycle;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (!best_unit)
|
||
|
|
fatal("uop_run: can not find execution unit\n");
|
||
|
|
|
||
|
|
if (best_start_cycle < decode_time)
|
||
|
|
best_start_cycle = decode_time;
|
||
|
|
best_unit->first_available_cycle = best_start_cycle + uop->throughput;
|
||
|
|
|
||
|
|
if (units == k5_2_units) {
|
||
|
|
if (uop->type == UOP_MEU_MUL)
|
||
|
|
mul_first_available_cycle = best_start_cycle + uop->throughput;
|
||
|
|
else if (uop->type == UOP_MEU_SHIFT)
|
||
|
|
shift_first_available_cycle = best_start_cycle + uop->throughput;
|
||
|
|
else if (uop->type == UOP_MEU_3DN)
|
||
|
|
m3dnow_first_available_cycle = best_start_cycle + uop->throughput;
|
||
|
|
}
|
||
|
|
|
||
|
|
return best_start_cycle + uop->throughput;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*The k5 decoder can decode, per clock :
|
||
|
|
- 1 or 2 'short' instructions, each up to 2 uOPs and 7 bytes long
|
||
|
|
- 1 'long' instruction, up to 4 uOPs
|
||
|
|
- 1 'vector' instruction, up to 4 uOPs per cycle, plus (I think) 1 cycle startup delay)
|
||
|
|
*/
|
||
|
|
static struct {
|
||
|
|
int nr_uops;
|
||
|
|
const risc86_uop_t *uops[4];
|
||
|
|
/*Earliest time a uop can start. If the timestamp is -1, then the uop is
|
||
|
|
part of a dependency chain and the start time is the completion time of
|
||
|
|
the previous uop*/
|
||
|
|
int earliest_start[4];
|
||
|
|
} decode_buffer;
|
||
|
|
|
||
|
|
#define NR_OPQUADS 6
|
||
|
|
/*Timestamps of when the last six opquads completed. The k5 scheduler retires
|
||
|
|
opquads in order, so this is needed to determine when the next can be scheduled*/
|
||
|
|
static int opquad_completion_timestamp[NR_OPQUADS];
|
||
|
|
static int next_opquad = 0;
|
||
|
|
|
||
|
|
#define NR_REGS 8
|
||
|
|
/*Timestamp of when last operation on an integer register completed*/
|
||
|
|
static int reg_available_timestamp[NR_REGS];
|
||
|
|
/*Timestamp of when last operation on an FPU register completed*/
|
||
|
|
static int fpu_st_timestamp[8];
|
||
|
|
/*Completion time of the last uop to be processed. Used to calculate timing of
|
||
|
|
dependent uop chains*/
|
||
|
|
static int last_uop_timestamp = 0;
|
||
|
|
|
||
|
|
void
|
||
|
|
decode_flush_k5(void)
|
||
|
|
{
|
||
|
|
int uop_timestamp = 0;
|
||
|
|
|
||
|
|
/*Decoded opquad can not be submitted if there are no free spaces in the
|
||
|
|
opquad buffer*/
|
||
|
|
if (decode_timestamp < opquad_completion_timestamp[next_opquad])
|
||
|
|
decode_timestamp = opquad_completion_timestamp[next_opquad];
|
||
|
|
|
||
|
|
/*Ensure that uops can not be submitted before they have been decoded*/
|
||
|
|
if (decode_timestamp > last_uop_timestamp)
|
||
|
|
last_uop_timestamp = decode_timestamp;
|
||
|
|
|
||
|
|
/*Submit uops to execution units, and determine the latest completion time*/
|
||
|
|
for (int c = 0; c < decode_buffer.nr_uops; c++) {
|
||
|
|
int start_timestamp;
|
||
|
|
|
||
|
|
if (decode_buffer.earliest_start[c] == -1)
|
||
|
|
start_timestamp = last_uop_timestamp;
|
||
|
|
else
|
||
|
|
start_timestamp = decode_buffer.earliest_start[c];
|
||
|
|
|
||
|
|
last_uop_timestamp = uop_run(decode_buffer.uops[c], start_timestamp);
|
||
|
|
if (last_uop_timestamp > uop_timestamp)
|
||
|
|
uop_timestamp = last_uop_timestamp;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*Calculate opquad completion time. Since opquads complete in order, it
|
||
|
|
must be after the last completion.*/
|
||
|
|
if (uop_timestamp <= last_complete_timestamp)
|
||
|
|
last_complete_timestamp = last_complete_timestamp + 1;
|
||
|
|
else
|
||
|
|
last_complete_timestamp = uop_timestamp;
|
||
|
|
|
||
|
|
/*Advance to next opquad in buffer*/
|
||
|
|
opquad_completion_timestamp[next_opquad] = last_complete_timestamp;
|
||
|
|
next_opquad++;
|
||
|
|
if (next_opquad == NR_OPQUADS)
|
||
|
|
next_opquad = 0;
|
||
|
|
|
||
|
|
decode_timestamp++;
|
||
|
|
decode_buffer.nr_uops = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*The instruction is only of interest here if it's longer than 7 bytes, as that's the
|
||
|
|
limit on k5 short decoding*/
|
||
|
|
static int
|
||
|
|
codegen_timing_instr_length(uint64_t deps, uint32_t fetchdat, int op_32)
|
||
|
|
{
|
||
|
|
int len = prefixes + 1; /*Opcode*/
|
||
|
|
if (deps & MODRM) {
|
||
|
|
len++; /*ModR/M*/
|
||
|
|
if (deps & HAS_IMM8)
|
||
|
|
len++;
|
||
|
|
if (deps & HAS_IMM1632)
|
||
|
|
len += (op_32 & 0x100) ? 4 : 2;
|
||
|
|
|
||
|
|
if (op_32 & 0x200) {
|
||
|
|
if ((fetchdat & 7) == 4 && (fetchdat & 0xc0) != 0xc0) {
|
||
|
|
/* Has SIB*/
|
||
|
|
len++;
|
||
|
|
if ((fetchdat & 0xc0) == 0x40)
|
||
|
|
len++;
|
||
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
||
|
|
len += 4;
|
||
|
|
else if ((fetchdat & 0x700) == 0x500)
|
||
|
|
len += 4;
|
||
|
|
} else {
|
||
|
|
if ((fetchdat & 0xc0) == 0x40)
|
||
|
|
len++;
|
||
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
||
|
|
len += 4;
|
||
|
|
else if ((fetchdat & 0xc7) == 0x05)
|
||
|
|
len += 4;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
if ((fetchdat & 0xc0) == 0x40)
|
||
|
|
len++;
|
||
|
|
else if ((fetchdat & 0xc0) == 0x80)
|
||
|
|
len += 2;
|
||
|
|
else if ((fetchdat & 0xc7) == 0x06)
|
||
|
|
len += 2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return len;
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
decode_instruction(const risc86_instruction_t *ins, uint64_t deps, uint32_t fetchdat, int op_32, int bit8)
|
||
|
|
{
|
||
|
|
uint32_t regmask_required;
|
||
|
|
uint32_t regmask_modified;
|
||
|
|
int c;
|
||
|
|
int d;
|
||
|
|
int earliest_start = 0;
|
||
|
|
decode_type_t decode_type = ins->decode_type;
|
||
|
|
int instr_length = codegen_timing_instr_length(deps, fetchdat, op_32);
|
||
|
|
|
||
|
|
/*Generate input register mask, and determine the earliest time this
|
||
|
|
instruction can start. This is not accurate, as this is calculated per
|
||
|
|
x86 instruction when it should be handled per uop*/
|
||
|
|
regmask_required = get_dstdep_mask(deps, fetchdat, bit8);
|
||
|
|
regmask_required |= get_addr_regmask(deps, fetchdat, op_32);
|
||
|
|
for (c = 0; c < 8; c++) {
|
||
|
|
if (regmask_required & (1 << c)) {
|
||
|
|
if (reg_available_timestamp[c] > decode_timestamp)
|
||
|
|
earliest_start = reg_available_timestamp[c];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if ((deps & FPU_RW_ST0) && fpu_st_timestamp[0] > decode_timestamp)
|
||
|
|
earliest_start = fpu_st_timestamp[0];
|
||
|
|
if ((deps & FPU_RW_ST1) && fpu_st_timestamp[1] > decode_timestamp)
|
||
|
|
earliest_start = fpu_st_timestamp[1];
|
||
|
|
if (deps & FPU_RW_STREG) {
|
||
|
|
int reg = fetchdat & 7;
|
||
|
|
|
||
|
|
if (fpu_st_timestamp[reg] > decode_timestamp)
|
||
|
|
earliest_start = fpu_st_timestamp[reg];
|
||
|
|
}
|
||
|
|
|
||
|
|
/*Short decoders are limited to 7 bytes*/
|
||
|
|
if (decode_type == DECODE_SHORT && instr_length > 7)
|
||
|
|
decode_type = DECODE_LONG;
|
||
|
|
/*Long decoder is limited to 11 bytes*/
|
||
|
|
else if (instr_length > 11)
|
||
|
|
decode_type = DECODE_VECTOR;
|
||
|
|
|
||
|
|
switch (decode_type) {
|
||
|
|
case DECODE_SHORT:
|
||
|
|
if (decode_buffer.nr_uops) {
|
||
|
|
decode_buffer.uops[decode_buffer.nr_uops] = &ins->uop[0];
|
||
|
|
decode_buffer.earliest_start[decode_buffer.nr_uops] = earliest_start;
|
||
|
|
if (ins->nr_uops > 1) {
|
||
|
|
decode_buffer.uops[decode_buffer.nr_uops + 1] = &ins->uop[1];
|
||
|
|
decode_buffer.earliest_start[decode_buffer.nr_uops + 1] = -1;
|
||
|
|
}
|
||
|
|
decode_buffer.nr_uops += ins->nr_uops;
|
||
|
|
|
||
|
|
decode_flush_k5();
|
||
|
|
} else {
|
||
|
|
decode_buffer.nr_uops = ins->nr_uops;
|
||
|
|
decode_buffer.uops[0] = &ins->uop[0];
|
||
|
|
decode_buffer.earliest_start[0] = earliest_start;
|
||
|
|
if (ins->nr_uops > 1) {
|
||
|
|
decode_buffer.uops[1] = &ins->uop[1];
|
||
|
|
decode_buffer.earliest_start[1] = -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case DECODE_LONG:
|
||
|
|
if (decode_buffer.nr_uops)
|
||
|
|
decode_flush_k5();
|
||
|
|
|
||
|
|
decode_buffer.nr_uops = ins->nr_uops;
|
||
|
|
for (c = 0; c < ins->nr_uops; c++) {
|
||
|
|
decode_buffer.uops[c] = &ins->uop[c];
|
||
|
|
if (c == 0)
|
||
|
|
decode_buffer.earliest_start[c] = earliest_start;
|
||
|
|
else
|
||
|
|
decode_buffer.earliest_start[c] = -1;
|
||
|
|
}
|
||
|
|
decode_flush_k5();
|
||
|
|
break;
|
||
|
|
|
||
|
|
case DECODE_VECTOR:
|
||
|
|
if (decode_buffer.nr_uops)
|
||
|
|
decode_flush_k5();
|
||
|
|
|
||
|
|
decode_timestamp++;
|
||
|
|
d = 0;
|
||
|
|
|
||
|
|
for (c = 0; c < ins->nr_uops; c++) {
|
||
|
|
decode_buffer.uops[d] = &ins->uop[c];
|
||
|
|
if (c == 0)
|
||
|
|
decode_buffer.earliest_start[d] = earliest_start;
|
||
|
|
else
|
||
|
|
decode_buffer.earliest_start[d] = -1;
|
||
|
|
d++;
|
||
|
|
|
||
|
|
if (d == 4) {
|
||
|
|
d = 0;
|
||
|
|
decode_buffer.nr_uops = 4;
|
||
|
|
decode_flush_k5();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (d) {
|
||
|
|
decode_buffer.nr_uops = d;
|
||
|
|
decode_flush_k5();
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*Update write timestamps for any output registers*/
|
||
|
|
regmask_modified = get_dstdep_mask(deps, fetchdat, bit8);
|
||
|
|
for (c = 0; c < 8; c++) {
|
||
|
|
if (regmask_modified & (1 << c))
|
||
|
|
reg_available_timestamp[c] = last_complete_timestamp;
|
||
|
|
}
|
||
|
|
if (deps & FPU_POP) {
|
||
|
|
for (c = 0; c < 7; c++)
|
||
|
|
fpu_st_timestamp[c] = fpu_st_timestamp[c + 1];
|
||
|
|
fpu_st_timestamp[7] = 0;
|
||
|
|
}
|
||
|
|
if (deps & FPU_POP2) {
|
||
|
|
for (c = 0; c < 6; c++)
|
||
|
|
fpu_st_timestamp[c] = fpu_st_timestamp[c + 2];
|
||
|
|
fpu_st_timestamp[6] = fpu_st_timestamp[7] = 0;
|
||
|
|
}
|
||
|
|
if (deps & FPU_PUSH) {
|
||
|
|
for (c = 0; c < 7; c++)
|
||
|
|
fpu_st_timestamp[c + 1] = fpu_st_timestamp[c];
|
||
|
|
fpu_st_timestamp[0] = 0;
|
||
|
|
}
|
||
|
|
if (deps & FPU_WRITE_ST0)
|
||
|
|
fpu_st_timestamp[0] = last_complete_timestamp;
|
||
|
|
if (deps & FPU_WRITE_ST1)
|
||
|
|
fpu_st_timestamp[1] = last_complete_timestamp;
|
||
|
|
if (deps & FPU_WRITE_STREG) {
|
||
|
|
int reg = fetchdat & 7;
|
||
|
|
if (deps & FPU_POP)
|
||
|
|
reg--;
|
||
|
|
if (reg >= 0 && !(reg == 0 && (deps & FPU_WRITE_ST0)) && !(reg == 1 && (deps & FPU_WRITE_ST1)))
|
||
|
|
fpu_st_timestamp[reg] = last_complete_timestamp;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
codegen_timing_k5_block_start(void)
|
||
|
|
{
|
||
|
|
int c;
|
||
|
|
|
||
|
|
for (c = 0; c < nr_units; c++)
|
||
|
|
units[c].first_available_cycle = 0;
|
||
|
|
|
||
|
|
mul_first_available_cycle = 0;
|
||
|
|
shift_first_available_cycle = 0;
|
||
|
|
m3dnow_first_available_cycle = 0;
|
||
|
|
|
||
|
|
decode_timestamp = 0;
|
||
|
|
last_complete_timestamp = 0;
|
||
|
|
|
||
|
|
for (c = 0; c < NR_OPQUADS; c++)
|
||
|
|
opquad_completion_timestamp[c] = 0;
|
||
|
|
next_opquad = 0;
|
||
|
|
|
||
|
|
for (c = 0; c < NR_REGS; c++)
|
||
|
|
reg_available_timestamp[c] = 0;
|
||
|
|
for (c = 0; c < 8; c++)
|
||
|
|
fpu_st_timestamp[c] = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
codegen_timing_k5_start(void)
|
||
|
|
{
|
||
|
|
if (cpu_s->cpu_type == CPU_K5) {
|
||
|
|
units = k5_units;
|
||
|
|
nr_units = NR_k5_UNITS;
|
||
|
|
} else {
|
||
|
|
units = k5_2_units;
|
||
|
|
nr_units = NR_k5_2_UNITS;
|
||
|
|
}
|
||
|
|
last_prefix = 0;
|
||
|
|
prefixes = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
codegen_timing_k5_prefix(uint8_t prefix, uint32_t fetchdat)
|
||
|
|
{
|
||
|
|
if (prefix != 0x0f)
|
||
|
|
decode_timestamp++;
|
||
|
|
|
||
|
|
last_prefix = prefix;
|
||
|
|
prefixes++;
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
codegen_timing_k5_opcode(uint8_t opcode, uint32_t fetchdat, int op_32, uint32_t op_pc)
|
||
|
|
{
|
||
|
|
const risc86_instruction_t **ins_table;
|
||
|
|
const uint64_t *deps;
|
||
|
|
int mod3 = ((fetchdat & 0xc0) == 0xc0);
|
||
|
|
int old_last_complete_timestamp = last_complete_timestamp;
|
||
|
|
int bit8 = !(opcode & 1);
|
||
|
|
|
||
|
|
switch (last_prefix) {
|
||
|
|
case 0x0f:
|
||
|
|
if (opcode == 0x0f) {
|
||
|
|
/*3DNow has the actual opcode after ModR/M, SIB and any offset*/
|
||
|
|
uint32_t opcode_pc = op_pc + 1; /*Byte after ModR/M*/
|
||
|
|
uint8_t modrm = fetchdat & 0xff;
|
||
|
|
uint8_t sib = (fetchdat >> 8) & 0xff;
|
||
|
|
|
||
|
|
if ((modrm & 0xc0) != 0xc0) {
|
||
|
|
if (op_32 & 0x200) {
|
||
|
|
if ((modrm & 7) == 4) {
|
||
|
|
/* Has SIB*/
|
||
|
|
opcode_pc++;
|
||
|
|
if ((modrm & 0xc0) == 0x40)
|
||
|
|
opcode_pc++;
|
||
|
|
else if ((modrm & 0xc0) == 0x80)
|
||
|
|
opcode_pc += 4;
|
||
|
|
else if ((sib & 0x07) == 0x05)
|
||
|
|
opcode_pc += 4;
|
||
|
|
} else {
|
||
|
|
if ((modrm & 0xc0) == 0x40)
|
||
|
|
opcode_pc++;
|
||
|
|
else if ((modrm & 0xc0) == 0x80)
|
||
|
|
opcode_pc += 4;
|
||
|
|
else if ((modrm & 0xc7) == 0x05)
|
||
|
|
opcode_pc += 4;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
if ((modrm & 0xc0) == 0x40)
|
||
|
|
opcode_pc++;
|
||
|
|
else if ((modrm & 0xc0) == 0x80)
|
||
|
|
opcode_pc += 2;
|
||
|
|
else if ((modrm & 0xc7) == 0x06)
|
||
|
|
opcode_pc += 2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
opcode = fastreadb(cs + opcode_pc);
|
||
|
|
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_0f0f_mod3 : opcode_timings_k5_0f0f;
|
||
|
|
deps = mod3 ? opcode_deps_0f0f_mod3 : opcode_deps_0f0f;
|
||
|
|
} else {
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_0f_mod3 : opcode_timings_k5_0f;
|
||
|
|
deps = mod3 ? opcode_deps_0f_mod3 : opcode_deps_0f;
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case 0xd8:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_d8_mod3 : opcode_timings_k5_d8;
|
||
|
|
deps = mod3 ? opcode_deps_d8_mod3 : opcode_deps_d8;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xd9:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_d9_mod3 : opcode_timings_k5_d9;
|
||
|
|
deps = mod3 ? opcode_deps_d9_mod3 : opcode_deps_d9;
|
||
|
|
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xda:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_da_mod3 : opcode_timings_k5_da;
|
||
|
|
deps = mod3 ? opcode_deps_da_mod3 : opcode_deps_da;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xdb:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_db_mod3 : opcode_timings_k5_db;
|
||
|
|
deps = mod3 ? opcode_deps_db_mod3 : opcode_deps_db;
|
||
|
|
opcode = mod3 ? opcode & 0x3f : (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xdc:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_dc_mod3 : opcode_timings_k5_dc;
|
||
|
|
deps = mod3 ? opcode_deps_dc_mod3 : opcode_deps_dc;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xdd:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_dd_mod3 : opcode_timings_k5_dd;
|
||
|
|
deps = mod3 ? opcode_deps_dd_mod3 : opcode_deps_dd;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xde:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_de_mod3 : opcode_timings_k5_de;
|
||
|
|
deps = mod3 ? opcode_deps_de_mod3 : opcode_deps_de;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xdf:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_df_mod3 : opcode_timings_k5_df;
|
||
|
|
deps = mod3 ? opcode_deps_df_mod3 : opcode_deps_df;
|
||
|
|
opcode = (opcode >> 3) & 7;
|
||
|
|
break;
|
||
|
|
|
||
|
|
default:
|
||
|
|
switch (opcode) {
|
||
|
|
case 0x80:
|
||
|
|
case 0x82:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_80_mod3 : opcode_timings_k5_80;
|
||
|
|
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0x81:
|
||
|
|
case 0x83:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_8x_mod3 : opcode_timings_k5_8x;
|
||
|
|
deps = mod3 ? opcode_deps_8x_mod3 : opcode_deps_8x;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case 0xc0:
|
||
|
|
case 0xd0:
|
||
|
|
case 0xd2:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_shift_b_mod3 : opcode_timings_k5_shift_b;
|
||
|
|
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case 0xc1:
|
||
|
|
case 0xd1:
|
||
|
|
case 0xd3:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_shift_mod3 : opcode_timings_k5_shift;
|
||
|
|
deps = mod3 ? opcode_deps_shift_mod3 : opcode_deps_shift;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case 0xf6:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_f6_mod3 : opcode_timings_k5_f6;
|
||
|
|
deps = mod3 ? opcode_deps_f6_mod3 : opcode_deps_f6;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xf7:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_f7_mod3 : opcode_timings_k5_f7;
|
||
|
|
deps = mod3 ? opcode_deps_f7_mod3 : opcode_deps_f7;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
case 0xff:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_ff_mod3 : opcode_timings_k5_ff;
|
||
|
|
deps = mod3 ? opcode_deps_ff_mod3 : opcode_deps_ff;
|
||
|
|
opcode = (fetchdat >> 3) & 7;
|
||
|
|
break;
|
||
|
|
|
||
|
|
default:
|
||
|
|
ins_table = mod3 ? opcode_timings_k5_mod3 : opcode_timings_k5;
|
||
|
|
deps = mod3 ? opcode_deps_mod3 : opcode_deps;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (ins_table[opcode])
|
||
|
|
decode_instruction(ins_table[opcode], deps[opcode], fetchdat, op_32, bit8);
|
||
|
|
else
|
||
|
|
decode_instruction(&vector_alu1_op, 0, fetchdat, op_32, bit8);
|
||
|
|
codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp);
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
codegen_timing_k5_block_end(void)
|
||
|
|
{
|
||
|
|
if (decode_buffer.nr_uops) {
|
||
|
|
int old_last_complete_timestamp = last_complete_timestamp;
|
||
|
|
decode_flush_k5();
|
||
|
|
codegen_block_cycles += (last_complete_timestamp - old_last_complete_timestamp);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
int
|
||
|
|
codegen_timing_k5_jump_cycles(void)
|
||
|
|
{
|
||
|
|
if (decode_buffer.nr_uops)
|
||
|
|
return 1;
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
codegen_timing_t codegen_timing_k5 = {
|
||
|
|
codegen_timing_k5_start,
|
||
|
|
codegen_timing_k5_prefix,
|
||
|
|
codegen_timing_k5_opcode,
|
||
|
|
codegen_timing_k5_block_start,
|
||
|
|
codegen_timing_k5_block_end,
|
||
|
|
codegen_timing_k5_jump_cycles
|
||
|
|
};
|