diff --git a/src/codegen_new/codegen_ir.c b/src/codegen_new/codegen_ir.c index ed8ae051f..d14fa0f23 100644 --- a/src/codegen_new/codegen_ir.c +++ b/src/codegen_new/codegen_ir.c @@ -38,7 +38,7 @@ codegen_ir_set_unroll(int count, int start, int first_instruction) static void duplicate_uop(ir_data_t *ir, uop_t *uop, int offset) { - uop_t *new_uop = uop_alloc(ir, uop->type); + uop_t *new_uop = uop_alloc_unroll(ir, uop->type); if (!ir_reg_is_invalid(uop->src_reg_a)) new_uop->src_reg_a = codegen_reg_read(uop->src_reg_a.reg); diff --git a/src/codegen_new/codegen_ir_defs.h b/src/codegen_new/codegen_ir_defs.h index 8c66b11f3..60f7badea 100644 --- a/src/codegen_new/codegen_ir_defs.h +++ b/src/codegen_new/codegen_ir_defs.h @@ -377,6 +377,34 @@ uop_alloc(ir_data_t *ir, uint32_t uop_type) uop->jump_dest_uop = -1; uop->jump_list_next = -1; + if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER)) + dirty_ir_regs[0] = dirty_ir_regs[1] = ~0ULL; + + return uop; +} + +static inline uop_t * +uop_alloc_unroll(ir_data_t *ir, uint32_t uop_type) +{ + uop_t *uop; + + if (ir->wr_pos >= UOP_NR_MAX) + fatal("Exceeded uOP max\n"); + + uop = &ir->uops[ir->wr_pos++]; + + uop->is_a16 = 0; + + uop->dest_reg_a = invalid_ir_reg; + uop->src_reg_a = invalid_ir_reg; + uop->src_reg_b = invalid_ir_reg; + uop->src_reg_c = invalid_ir_reg; + + uop->pc = cpu_state.oldpc; + + uop->jump_dest_uop = -1; + uop->jump_list_next = -1; + if (uop_type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER)) codegen_reg_mark_as_required(); diff --git a/src/codegen_new/codegen_reg.c b/src/codegen_new/codegen_reg.c index 91d7b69e2..f91377df8 100644 --- a/src/codegen_new/codegen_reg.c +++ b/src/codegen_new/codegen_reg.c @@ -34,6 +34,8 @@ typedef struct host_reg_set_t { static host_reg_set_t host_reg_set; static host_reg_set_t host_fp_reg_set; +uint64_t dirty_ir_regs[2] = { 0, 0 }; + enum { REG_BYTE, REG_WORD, @@ -184,6 +186,24 @@ struct [IREG_temp1d] = { REG_DOUBLE, (void *) 48, REG_FP, REG_VOLATILE }, }; +static const uint8_t native_requested_sizes[9][8] = +{ + [REG_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_BYTE][IREG_SIZE_B >> IREG_SIZE_SHIFT] = 1, + [REG_WORD][IREG_SIZE_W >> IREG_SIZE_SHIFT] = 1, + [REG_DWORD][IREG_SIZE_L >> IREG_SIZE_SHIFT] = 1, + [REG_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_QWORD][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_DOUBLE][IREG_SIZE_D >> IREG_SIZE_SHIFT] = 1, + [REG_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_QWORD][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + [REG_FPU_ST_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, + + [REG_POINTER][(sizeof(void *) == 4) ? (IREG_SIZE_L >> IREG_SIZE_SHIFT) : (IREG_SIZE_Q >> IREG_SIZE_SHIFT)] = 1 +}; + void codegen_reg_mark_as_required(void) { @@ -195,6 +215,7 @@ codegen_reg_mark_as_required(void) if (last_version > 0) reg_version[reg][last_version].flags |= REG_FLAGS_REQUIRED; } + dirty_ir_regs[0] = dirty_ir_regs[1] = 0; } int @@ -203,29 +224,7 @@ reg_is_native_size(ir_reg_t ir_reg) int native_size = ireg_data[IREG_GET_REG(ir_reg.reg)].native_size; int requested_size = IREG_GET_SIZE(ir_reg.reg); - switch (native_size) { - case REG_BYTE: - case REG_FPU_ST_BYTE: - return (requested_size == IREG_SIZE_B); - case REG_WORD: - return (requested_size == IREG_SIZE_W); - case REG_DWORD: - return (requested_size == IREG_SIZE_L); - case REG_QWORD: - case REG_FPU_ST_QWORD: - case REG_DOUBLE: - case REG_FPU_ST_DOUBLE: - return ((requested_size == IREG_SIZE_D) || (requested_size == IREG_SIZE_Q)); - case REG_POINTER: - if (sizeof(void *) == 4) - return (requested_size == IREG_SIZE_L); - return (requested_size == IREG_SIZE_Q); - - default: - fatal("get_reg_is_native_size: unknown native size %i\n", native_size); - } - - return 0; + return native_requested_sizes[native_size][requested_size >> IREG_SIZE_SHIFT]; } void @@ -258,6 +257,8 @@ codegen_reg_reset(void) host_fp_reg_set.locked = 0; host_fp_reg_set.nr_regs = CODEGEN_HOST_FP_REGS; + dirty_ir_regs[0] = dirty_ir_regs[1] = 0; + for (c = 0; c < IREG_COUNT; c++) { reg_last_version[c] = 0; reg_version[c][0].refcount = 0; diff --git a/src/codegen_new/codegen_reg.h b/src/codegen_new/codegen_reg.h index 6987b6f8c..a86bcd1cf 100644 --- a/src/codegen_new/codegen_reg.h +++ b/src/codegen_new/codegen_reg.h @@ -278,6 +278,7 @@ ireg_seg_limit_high(x86seg *seg) } extern uint8_t reg_last_version[IREG_COUNT]; +extern uint64_t dirty_ir_regs[2]; /*This version of the register must be calculated, regardless of whether it is apparently required or not. Do not optimise out.*/ @@ -362,10 +363,12 @@ codegen_reg_write(int reg, int uop_nr) int last_version = reg_last_version[IREG_GET_REG(reg)]; reg_version_t *version; -#ifndef RELEASE_BUILD - if (IREG_GET_REG(reg) == IREG_INVALID) - fatal("codegen_reg_write - IREG_INVALID\n"); -#endif + if (dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] & (1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full))) { + dirty_ir_regs[(IREG_GET_REG(reg) >> 6) & 3] &= ~(1ull << ((uint64_t)IREG_GET_REG(reg) & 0x3full)); + if ((IREG_GET_REG(reg) > IREG_EBX && IREG_GET_REG(reg) < IREG_temp0) && last_version > 0) { + reg_version[IREG_GET_REG(reg)][last_version].flags |= REG_FLAGS_REQUIRED; + } + } ireg.reg = reg; ireg.version = last_version + 1; @@ -375,12 +378,8 @@ codegen_reg_write(int reg, int uop_nr) } reg_last_version[IREG_GET_REG(reg)]++; -#ifndef RELEASE_BUILD - if (!reg_last_version[IREG_GET_REG(reg)]) - fatal("codegen_reg_write - version overflow\n"); - else -#endif - if (reg_last_version[IREG_GET_REG(reg)] > REG_VERSION_MAX) + + if (reg_last_version[IREG_GET_REG(reg)] > REG_VERSION_MAX) CPU_BLOCK_END(); if (reg_last_version[IREG_GET_REG(reg)] > max_version_refcount) max_version_refcount = reg_last_version[IREG_GET_REG(reg)];