Rewrote the recompiler interrupt checking in assembly (and removed it for the new dynamic compiler because the requires uops are not present), brings performance back up, and also did a number of CPU-related clean-ups (mostly removal of dead variables and associated code).

This commit is contained in:
OBattler
2020-07-13 19:46:19 +02:00
parent 0cd0d83cee
commit a862bda04c
32 changed files with 196 additions and 291 deletions

View File

@@ -308,13 +308,6 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr);
extern int cpu_block_end;
extern uint32_t codegen_endpc;
extern int cpu_recomp_blocks, cpu_recomp_full_ins, cpu_new_blocks;
extern int cpu_recomp_blocks_latched, cpu_recomp_ins_latched, cpu_recomp_full_ins_latched, cpu_new_blocks_latched;
extern int cpu_recomp_flushes, cpu_recomp_flushes_latched;
extern int cpu_recomp_evicted, cpu_recomp_evicted_latched;
extern int cpu_recomp_reuse, cpu_recomp_reuse_latched;
extern int cpu_recomp_removed, cpu_recomp_removed_latched;
extern int codegen_block_cycles;
extern void (*codegen_timing_start)();

View File

@@ -1,7 +1,6 @@
enum
{
ACCREG_ins = 0,
ACCREG_cycles = 1,
ACCREG_cycles = 0,
ACCREG_COUNT
};

View File

@@ -13,7 +13,6 @@ static struct
uintptr_t dest_reg;
} acc_regs[] =
{
[ACCREG_ins] = {0, (uintptr_t) &(ins)},
[ACCREG_cycles] = {0, (uintptr_t) &(cycles)},
};
@@ -22,37 +21,42 @@ void codegen_accumulate(int acc_reg, int delta)
acc_regs[acc_reg].count += delta;
if ((acc_reg == ACCREG_cycles) && (delta != 0)) {
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &(acycs));
addlong(-delta);
if (delta == -1) {
/* -delta = 1 */
addbyte(0xff); /*inc dword ptr[&acycs]*/
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &(acycs));
} else if (delta == 1) {
/* -delta = -1 */
addbyte(0xff); /*dec dword ptr[&acycs]*/
addbyte(0x0c);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &(acycs));
} else {
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &(acycs));
addlong(-delta);
}
}
}
void codegen_accumulate_flush(void)
{
int c;
for (c = 0; c < ACCREG_COUNT; c++)
{
if (acc_regs[c].count)
{
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) acc_regs[c].dest_reg);
addlong(acc_regs[c].count);
}
if (acc_regs[0].count) {
addbyte(0x81); /*ADD $acc_regs[0].count,acc_regs[0].dest*/
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) acc_regs[0].dest_reg);
addlong(acc_regs[0].count);
}
acc_regs[c].count = 0;
}
acc_regs[0].count = 0;
}
void codegen_accumulate_reset()
{
int c;
for (c = 0; c < ACCREG_COUNT; c++)
acc_regs[c].count = 0;
acc_regs[0].count = 0;
}

View File

@@ -13,7 +13,6 @@ static struct
uintptr_t dest_reg;
} acc_regs[] =
{
[ACCREG_ins] = {0, (uintptr_t) &(ins)},
[ACCREG_cycles] = {0, (uintptr_t) &(cycles)}
};
@@ -22,35 +21,38 @@ void codegen_accumulate(int acc_reg, int delta)
acc_regs[acc_reg].count += delta;
if ((acc_reg == ACCREG_cycles) && (delta != 0)) {
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x05);
addlong((uint32_t) (uintptr_t) &(acycs));
addlong((uintptr_t) -delta);
if (delta == -1) {
/* -delta = 1 */
addbyte(0xff); /*inc dword ptr[&acycs]*/
addbyte(0x05);
addlong((uint32_t) (uintptr_t) &(acycs));
} else if (delta == 1) {
/* -delta = -1 */
addbyte(0xff); /*dec dword ptr[&acycs]*/
addbyte(0x0d);
addlong((uint32_t) (uintptr_t) &(acycs));
} else {
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x05);
addlong((uint32_t) (uintptr_t) &(acycs));
addlong((uintptr_t) -delta);
}
}
}
void codegen_accumulate_flush(void)
{
int c;
for (c = 0; c < ACCREG_COUNT; c++)
{
if (acc_regs[c].count)
{
addbyte(0x81); /*ADD $acc_regs[c].count,acc_regs[c].dest*/
addbyte(0x05);
addlong((uint32_t) acc_regs[c].dest_reg);
addlong(acc_regs[c].count);
}
if (acc_regs[0].count) {
addbyte(0x81); /*ADD $acc_regs[0].count,acc_regs[0].dest*/
addbyte(0x05);
addlong((uint32_t) acc_regs[0].dest_reg);
addlong(acc_regs[0].count);
}
acc_regs[c].count = 0;
}
acc_regs[0].count = 0;
}
void codegen_accumulate_reset()
{
int c;
for (c = 0; c < ACCREG_COUNT; c++)
acc_regs[c].count = 0;
acc_regs[0].count = 0;
}

View File

@@ -19,6 +19,7 @@ static uint32_t ropCLI(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32
if (!IOPLp && (cr4 & (CR4_VME | CR4_PVI)))
return 0;
CLEAR_BITS((uintptr_t)&cpu_state.flags, I_FLAG);
CLEAR_BITS((uintptr_t)&pic_pending, 0xffffffff);
return op_pc;
}
static uint32_t ropSTI(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block)

View File

@@ -49,11 +49,6 @@ int block_current = 0;
static int block_num;
int block_pos;
int cpu_recomp_flushes, cpu_recomp_flushes_latched;
int cpu_recomp_evicted, cpu_recomp_evicted_latched;
int cpu_recomp_reuse, cpu_recomp_reuse_latched;
int cpu_recomp_removed, cpu_recomp_removed_latched;
uint32_t codegen_endpc;
int codegen_block_cycles;
@@ -224,7 +219,6 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
if (mask & block->page_mask)
{
delete_block(block);
cpu_recomp_evicted++;
}
if (block == block->next)
fatal("Broken 1\n");
@@ -238,7 +232,6 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
if (mask & block->page_mask2)
{
delete_block(block);
cpu_recomp_evicted++;
}
if (block == block->next_2)
fatal("Broken 2\n");
@@ -260,7 +253,6 @@ void codegen_block_init(uint32_t phys_addr)
if (block->valid != 0)
{
delete_block(block);
cpu_recomp_reuse++;
}
block_num = HASH(phys_addr);
codeblock_hash[block_num] = &codeblock[block_current];
@@ -393,7 +385,6 @@ void codegen_block_remove()
codeblock_t *block = &codeblock[block_current];
delete_block(block);
cpu_recomp_removed++;
recomp_page = -1;
}
@@ -1049,7 +1040,6 @@ void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t
generate_call:
codegen_timing_opcode(opcode, fetchdat, op_32, op_pc);
codegen_accumulate(ACCREG_ins, 1);
codegen_accumulate(ACCREG_cycles, -codegen_block_cycles);
codegen_block_cycles = 0;
@@ -1096,10 +1086,11 @@ generate_call:
codegen_endpc = (cs + cpu_state.pc) + 8;
/* Check for interrupts. */
call(block, (uintptr_t)int_check);
addbyte(0x85); /*OR %eax, %eax*/
addbyte(0xc0);
addbyte(0xf6); /* test byte ptr[&pic_pending],1 */
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &pic_pending);
addbyte(0x01);
addbyte(0x0F); addbyte(0x85); /*JNZ 0*/
addlong((uint32_t)(uintptr_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(uintptr_t)(&block->data[block_pos + 4]));
@@ -1175,13 +1166,11 @@ generate_call:
block->ins++;
addbyte(0x85); /*OR %eax, %eax*/
addbyte(0xc0);
addbyte(0x0F); addbyte(0x85); /*JNZ 0*/
addlong((uint32_t)(uintptr_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(uintptr_t)(&block->data[block_pos + 4]));
/* Check for interrupts. */
call(block, (uintptr_t)int_check);
addbyte(0x0a); /* or al,byte ptr[&pic_pending] */
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t) (uintptr_t) &pic_pending);
addbyte(0x85); /*OR %eax, %eax*/
addbyte(0xc0);

View File

@@ -50,6 +50,9 @@
#include "x86_flags.h"
#include "x86_ops.h"
#include "x87.h"
/*ex*/
#include <86box/nmi.h>
#include <86box/pic.h>
#include "386_common.h"
@@ -88,11 +91,6 @@ int block_current = 0;
static int block_num;
int block_pos;
int cpu_recomp_flushes, cpu_recomp_flushes_latched;
int cpu_recomp_evicted, cpu_recomp_evicted_latched;
int cpu_recomp_reuse, cpu_recomp_reuse_latched;
int cpu_recomp_removed, cpu_recomp_removed_latched;
uint32_t codegen_endpc;
int codegen_block_cycles;
@@ -1367,7 +1365,6 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
if (mask & block->page_mask)
{
delete_block(block);
cpu_recomp_evicted++;
}
if (block == block->next)
fatal("Broken 1\n");
@@ -1381,7 +1378,6 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
if (mask & block->page_mask2)
{
delete_block(block);
cpu_recomp_evicted++;
}
if (block == block->next_2)
fatal("Broken 2\n");
@@ -1403,7 +1399,6 @@ void codegen_block_init(uint32_t phys_addr)
if (block->valid != 0)
{
delete_block(block);
cpu_recomp_reuse++;
}
block_num = HASH(phys_addr);
codeblock_hash[block_num] = &codeblock[block_current];
@@ -1512,7 +1507,6 @@ void codegen_block_remove()
codeblock_t *block = &codeblock[block_current];
delete_block(block);
cpu_recomp_removed++;
recomp_page = -1;
}
@@ -2016,7 +2010,6 @@ void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t
generate_call:
codegen_timing_opcode(opcode, fetchdat, op_32, op_pc);
codegen_accumulate(ACCREG_ins, 1);
codegen_accumulate(ACCREG_cycles, -codegen_block_cycles);
codegen_block_cycles = 0;
@@ -2063,11 +2056,10 @@ generate_call:
codegen_endpc = (cs + cpu_state.pc) + 8;
/* Check for interrupts. */
addbyte(0xE8); /*CALL*/
addlong(((uint8_t *)int_check - (uint8_t *)(&block->data[block_pos + 4])));
addbyte(0x09); /*OR %eax, %eax*/
addbyte(0xc0);
addbyte(0xf6); /* test byte ptr[&pic_pending],1 */
addbyte(0x05);
addlong((uint32_t) (uintptr_t) &pic_pending);
addbyte(0x01);
addbyte(0x0F); addbyte(0x85); /*JNZ 0*/
addlong((uint32_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(&block->data[block_pos + 4]));
@@ -2154,14 +2146,10 @@ generate_call:
block->ins++;
addbyte(0x09); /*OR %eax, %eax*/
addbyte(0xc0);
addbyte(0x0F); addbyte(0x85); /*JNZ 0*/
addlong((uint32_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(&block->data[block_pos + 4]));
/* Check for interrupts. */
addbyte(0xE8); /*CALL*/
addlong(((uint8_t *)int_check - (uint8_t *)(&block->data[block_pos + 4])));
addbyte(0x0a); /* or al,byte ptr[&pic_pending] */
addbyte(0x05);
addlong((uint32_t) (uintptr_t) &pic_pending);
addbyte(0x09); /*OR %eax, %eax*/
addbyte(0xc0);

View File

@@ -918,11 +918,9 @@ void loadcscall(uint16_t seg)
int type;
uint16_t tempw;
int csout = output;
if (msw&1 && !(cpu_state.eflags&VM_FLAG))
{
if (csout) x86seg_log("Protected mode CS load! %04X\n",seg);
x86seg_log("Protected mode CS load! %04X\n", seg);
if (!(seg&~3))
{
x86gpf("loadcscall(): Protected mode selector is zero",0);
@@ -956,7 +954,7 @@ void loadcscall(uint16_t seg)
newpc=segdat[0];
if (type&0x800) newpc|=segdat[3]<<16;
if (csout) x86seg_log("Code seg call - %04X - %04X %04X %04X\n",seg,segdat[0],segdat[1],segdat[2]);
x86seg_log("Code seg call - %04X - %04X %04X %04X\n",seg,segdat[0],segdat[1],segdat[2]);
if (segdat[2]&0x1000)
{
if (!(segdat[2]&0x400)) /*Not conforming*/
@@ -1001,14 +999,16 @@ void loadcscall(uint16_t seg)
CS=seg;
do_seg_load(&cpu_state.seg_cs, segdat);
if (CPL==3 && oldcpl!=3) flushmmucache_cr3();
if (csout) x86seg_log("Complete\n");
#ifdef ENABLE_X86SEG_LOG
x86seg_log("Complete\n");
#endif
cycles -= timing_call_pm;
}
else
{
type=segdat[2]&0xF00;
if (csout) x86seg_log("Type %03X\n",type);
x86seg_log("Type %03X\n",type);
switch (type)
{
case 0x400: /*Call gate*/