NDR: For barrier micro-ops, lazily mark written registers instead of iterating
Also remove some dead code in there. 4-5% more improvement observed.
This commit is contained in:
@@ -38,7 +38,7 @@ codegen_ir_set_unroll(int count, int start, int first_instruction)
|
||||
static void
|
||||
duplicate_uop(ir_data_t *ir, uop_t *uop, int offset)
|
||||
{
|
||||
uop_t *new_uop = uop_alloc(ir, uop->type);
|
||||
uop_t *new_uop = uop_alloc_unroll(ir, uop->type);
|
||||
|
||||
if (!ir_reg_is_invalid(uop->src_reg_a))
|
||||
new_uop->src_reg_a = codegen_reg_read(uop->src_reg_a.reg);
|
||||
|
||||
@@ -377,6 +377,34 @@ uop_alloc(ir_data_t *ir, uint32_t uop_type)
|
||||
uop->jump_dest_uop = -1;
|
||||
uop->jump_list_next = -1;
|
||||
|
||||
if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER))
|
||||
dirty_ir_regs[0] = dirty_ir_regs[1] = ~0ULL;
|
||||
|
||||
return uop;
|
||||
}
|
||||
|
||||
static inline uop_t *
|
||||
uop_alloc_unroll(ir_data_t *ir, uint32_t uop_type)
|
||||
{
|
||||
uop_t *uop;
|
||||
|
||||
if (ir->wr_pos >= UOP_NR_MAX)
|
||||
fatal("Exceeded uOP max\n");
|
||||
|
||||
uop = &ir->uops[ir->wr_pos++];
|
||||
|
||||
uop->is_a16 = 0;
|
||||
|
||||
uop->dest_reg_a = invalid_ir_reg;
|
||||
uop->src_reg_a = invalid_ir_reg;
|
||||
uop->src_reg_b = invalid_ir_reg;
|
||||
uop->src_reg_c = invalid_ir_reg;
|
||||
|
||||
uop->pc = cpu_state.oldpc;
|
||||
|
||||
uop->jump_dest_uop = -1;
|
||||
uop->jump_list_next = -1;
|
||||
|
||||
if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER))
|
||||
codegen_reg_mark_as_required();
|
||||
|
||||
|
||||
@@ -34,6 +34,8 @@ typedef struct host_reg_set_t {
|
||||
static host_reg_set_t host_reg_set;
|
||||
static host_reg_set_t host_fp_reg_set;
|
||||
|
||||
uint64_t dirty_ir_regs[2] = { 0, 0 };
|
||||
|
||||
enum {
|
||||
REG_BYTE,
|
||||
REG_WORD,
|
||||
@@ -184,6 +186,24 @@ struct
|
||||
[IREG_temp1d] = { REG_DOUBLE, (void *) 48, REG_FP, REG_VOLATILE },
|
||||
};
|
||||
|
||||
static const uint8_t native_requested_sizes[9][8] =
|
||||
{
|
||||
[REG_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_FPU_ST_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_WORD][IREG_SIZE_W >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_DWORD][IREG_SIZE_L >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_FPU_ST_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_FPU_ST_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_FPU_ST_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
|
||||
[REG_FPU_ST_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1,
|
||||
|
||||
[REG_POINTER][(sizeof(void *) == 4) ? (IREG_SIZE_L >> IREG_SIZE_SHIFT) : (IREG_SIZE_Q >> IREG_SIZE_SHIFT)] = 1
|
||||
};
|
||||
|
||||
void
|
||||
codegen_reg_mark_as_required(void)
|
||||
{
|
||||
@@ -195,6 +215,7 @@ codegen_reg_mark_as_required(void)
|
||||
if (last_version > 0)
|
||||
reg_version[reg][last_version].flags |= REG_FLAGS_REQUIRED;
|
||||
}
|
||||
dirty_ir_regs[0] = dirty_ir_regs[1] = 0;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -203,29 +224,7 @@ reg_is_native_size(ir_reg_t ir_reg)
|
||||
int native_size = ireg_data[IREG_GET_REG(ir_reg.reg)].native_size;
|
||||
int requested_size = IREG_GET_SIZE(ir_reg.reg);
|
||||
|
||||
switch (native_size) {
|
||||
case REG_BYTE:
|
||||
case REG_FPU_ST_BYTE:
|
||||
return (requested_size == IREG_SIZE_B);
|
||||
case REG_WORD:
|
||||
return (requested_size == IREG_SIZE_W);
|
||||
case REG_DWORD:
|
||||
return (requested_size == IREG_SIZE_L);
|
||||
case REG_QWORD:
|
||||
case REG_FPU_ST_QWORD:
|
||||
case REG_DOUBLE:
|
||||
case REG_FPU_ST_DOUBLE:
|
||||
return ((requested_size == IREG_SIZE_D) || (requested_size == IREG_SIZE_Q));
|
||||
case REG_POINTER:
|
||||
if (sizeof(void *) == 4)
|
||||
return (requested_size == IREG_SIZE_L);
|
||||
return (requested_size == IREG_SIZE_Q);
|
||||
|
||||
default:
|
||||
fatal("get_reg_is_native_size: unknown native size %i\n", native_size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return native_requested_sizes[native_size][requested_size >> IREG_SIZE_SHIFT];
|
||||
}
|
||||
|
||||
void
|
||||
@@ -258,6 +257,8 @@ codegen_reg_reset(void)
|
||||
host_fp_reg_set.locked = 0;
|
||||
host_fp_reg_set.nr_regs = CODEGEN_HOST_FP_REGS;
|
||||
|
||||
dirty_ir_regs[0] = dirty_ir_regs[1] = 0;
|
||||
|
||||
for (c = 0; c < IREG_COUNT; c++) {
|
||||
reg_last_version[c] = 0;
|
||||
reg_version[c][0].refcount = 0;
|
||||
|
||||
@@ -278,6 +278,7 @@ ireg_seg_limit_high(x86seg *seg)
|
||||
}
|
||||
|
||||
extern uint8_t reg_last_version[IREG_COUNT];
|
||||
extern uint64_t dirty_ir_regs[2];
|
||||
|
||||
/*This version of the register must be calculated, regardless of whether it is
|
||||
apparently required or not. Do not optimise out.*/
|
||||
@@ -362,10 +363,12 @@ codegen_reg_write(int reg, int uop_nr)
|
||||
int last_version = reg_last_version[IREG_GET_REG(reg)];
|
||||
reg_version_t *version;
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
if (IREG_GET_REG(reg) == IREG_INVALID)
|
||||
fatal("codegen_reg_write - IREG_INVALID\n");
|
||||
#endif
|
||||
if (dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] & (1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full))) {
|
||||
dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] &= ~(1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full));
|
||||
if ((IREG_GET_REG(reg) > IREG_EBX && IREG_GET_REG(reg) < IREG_temp0) && last_version > 0) {
|
||||
reg_version[IREG_GET_REG(reg)][last_version].flags |= REG_FLAGS_REQUIRED;
|
||||
}
|
||||
}
|
||||
ireg.reg = reg;
|
||||
ireg.version = last_version + 1;
|
||||
|
||||
@@ -375,11 +378,7 @@ codegen_reg_write(int reg, int uop_nr)
|
||||
}
|
||||
|
||||
reg_last_version[IREG_GET_REG(reg)]++;
|
||||
#ifndef RELEASE_BUILD
|
||||
if (!reg_last_version[IREG_GET_REG(reg)])
|
||||
fatal("codegen_reg_write - version overflow\n");
|
||||
else
|
||||
#endif
|
||||
|
||||
if (reg_last_version[IREG_GET_REG(reg)] > REG_VERSION_MAX)
|
||||
CPU_BLOCK_END();
|
||||
if (reg_last_version[IREG_GET_REG(reg)] > max_version_refcount)
|
||||
|
||||
Reference in New Issue
Block a user