diff --git a/src/CPU/386_dynarec.c b/src/CPU/386_dynarec.c index a362f50da..c59110cd6 100644 --- a/src/CPU/386_dynarec.c +++ b/src/CPU/386_dynarec.c @@ -1426,7 +1426,7 @@ void exec386_dynarec(int cycs) { uint64_t mask = (uint64_t)1 << ((phys_addr >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK); - if (page->code_present_mask & mask) + if (page->code_present_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK] & mask) { /*Walk page tree to see if we find the correct block*/ codeblock_t *new_block = codeblock_tree_find(phys_addr, cs); @@ -1439,10 +1439,11 @@ void exec386_dynarec(int cycs) } } } - if (valid_block && (block->page_mask & page->dirty_mask)) + + if (valid_block && (block->page_mask & *block->dirty_mask)) { - codegen_check_flush(page, page->dirty_mask, phys_addr); - page->dirty_mask = 0; + codegen_check_flush(page, page->dirty_mask[(phys_addr >> 10) & 3], phys_addr); + page->dirty_mask[(phys_addr >> 10) & 3] = 0; if (!block->pc) valid_block = 0; } @@ -1455,15 +1456,15 @@ void exec386_dynarec(int cycs) allow the first page to be interpreted and for the page fault to occur when the page boundary is actually crossed.*/ - uint32_t phys_addr_2 = get_phys_noabrt(block->endpc) & ~0xfff; + uint32_t phys_addr_2 = get_phys_noabrt(block->endpc); page_t *page_2 = &pages[phys_addr_2 >> 12]; if ((block->phys_2 ^ phys_addr_2) & ~0xfff) valid_block = 0; - else if (block->page_mask2 & page_2->dirty_mask) + else if (block->page_mask2 & *block->dirty_mask2) { - codegen_check_flush(page_2, page_2->dirty_mask, phys_addr_2); - page_2->dirty_mask = 0; + codegen_check_flush(page_2, page_2->dirty_mask[(phys_addr_2 >> 10) & 3], phys_addr_2); + page_2->dirty_mask[(phys_addr_2 >> 10) & 3] = 0; if (!block->pc) valid_block = 0; } @@ -1537,7 +1538,7 @@ inrecomp=0; will prevent any block from spanning more than 2 pages. In practice this limit will never be hit, as host block size is only 2kB*/ - if ((cpu_state.pc - start_pc) > 4000) + if ((cpu_state.pc - start_pc) > 1000) CPU_BLOCK_END(); if (trap) @@ -1605,7 +1606,7 @@ inrecomp=0; will prevent any block from spanning more than 2 pages. In practice this limit will never be hit, as host block size is only 2kB*/ - if ((cpu_state.pc - start_pc) > 4000) + if ((cpu_state.pc - start_pc) > 1000) CPU_BLOCK_END(); if (trap) diff --git a/src/CPU/codegen.h b/src/CPU/codegen.h index d4c25ae7a..e86919c3a 100644 --- a/src/CPU/codegen.h +++ b/src/CPU/codegen.h @@ -36,6 +36,7 @@ typedef struct codeblock_t { uint64_t page_mask, page_mask2; + uint64_t *dirty_mask, *dirty_mask2; uint64_t cmp; /*Previous and next pointers, for the codeblock list associated with @@ -236,8 +237,10 @@ static __inline void codeblock_tree_delete(codeblock_t *block) } } +#define PAGE_MASK_INDEX_MASK 3 +#define PAGE_MASK_INDEX_SHIFT 10 #define PAGE_MASK_MASK 63 -#define PAGE_MASK_SHIFT 6 +#define PAGE_MASK_SHIFT 4 extern codeblock_t *codeblock; diff --git a/src/CPU/codegen_ops_x86-64.h b/src/CPU/codegen_ops_x86-64.h index e85f5315f..4805be498 100644 --- a/src/CPU/codegen_ops_x86-64.h +++ b/src/CPU/codegen_ops_x86-64.h @@ -5504,7 +5504,7 @@ static void MEM_CHECK_WRITE_W(x86seg *seg) { addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ addbyte(0x3c); - addbyte(0xfd); + addbyte(0xf5); addlong((uint32_t)writelookup2); addbyte(-1); } @@ -5653,7 +5653,7 @@ static void MEM_CHECK_WRITE_L(x86seg *seg) { addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ addbyte(0x3c); - addbyte(0xfd); + addbyte(0xf5); addlong((uint32_t)writelookup2); addbyte(-1); } diff --git a/src/CPU/codegen_x86-64.c b/src/CPU/codegen_x86-64.c index c5214bd4b..3ce22d95e 100644 --- a/src/CPU/codegen_x86-64.c +++ b/src/CPU/codegen_x86-64.c @@ -87,7 +87,6 @@ void codegen_init() exit(-1); } #endif -// pclog("Codegen is %p\n", (void *)pages[0xfab12 >> 12].block); } void codegen_reset() @@ -99,25 +98,11 @@ void codegen_reset() void dump_block() { - codeblock_t *block = pages[0x119000 >> 12].block; - - pclog("dump_block:\n"); - while (block) - { - uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff); - uint32_t end_pc = (block->endpc & 0xffc) | (block->phys & ~0xfff); - pclog(" %p : %08x-%08x %08x-%08x %p %p\n", (void *)block, start_pc, end_pc, block->pc, block->endpc, (void *)block->prev, (void *)block->next); - if (!block->pc) - fatal("Dead PC=0\n"); - - block = block->next; - } - pclog("dump_block done\n"); } static void add_to_block_list(codeblock_t *block) { - codeblock_t *block_prev = pages[block->phys >> 12].block; + codeblock_t *block_prev = pages[block->phys >> 12].block[(block->phys >> 10) & 3]; if (!block->page_mask) fatal("add_to_block_list - mask = 0\n"); @@ -126,12 +111,12 @@ static void add_to_block_list(codeblock_t *block) { block->next = block_prev; block_prev->prev = block; - pages[block->phys >> 12].block = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; } else { block->next = NULL; - pages[block->phys >> 12].block = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; } if (block->next) @@ -142,18 +127,18 @@ static void add_to_block_list(codeblock_t *block) if (block->page_mask2) { - block_prev = pages[block->phys_2 >> 12].block_2; + block_prev = pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]; if (block_prev) { block->next_2 = block_prev; block_prev->prev_2 = block; - pages[block->phys_2 >> 12].block_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; } else { block->next_2 = NULL; - pages[block->phys_2 >> 12].block_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; } } } @@ -171,7 +156,7 @@ static void remove_from_block_list(codeblock_t *block, uint32_t pc) } else { - pages[block->phys >> 12].block = block->next; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block->next; if (block->next) block->next->prev = NULL; else @@ -192,8 +177,7 @@ static void remove_from_block_list(codeblock_t *block, uint32_t pc) } else { -// pclog(" pages.block_2=%p 3 %p %p\n", (void *)block->next_2, (void *)block, (void *)pages[block->phys_2 >> 12].block_2); - pages[block->phys_2 >> 12].block_2 = block->next_2; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block->next_2; if (block->next_2) block->next_2->prev_2 = NULL; else @@ -218,7 +202,7 @@ static void delete_block(codeblock_t *block) void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) { - struct codeblock_t *block = page->block; + struct codeblock_t *block = page->block[(phys_addr >> 10) & 3]; while (block) { @@ -232,7 +216,7 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) block = block->next; } - block = page->block_2; + block = page->block_2[(phys_addr >> 10) & 3]; while (block) { @@ -253,17 +237,14 @@ void codegen_block_init(uint32_t phys_addr) int has_evicted = 0; page_t *page = &pages[phys_addr >> 12]; - if (!page->block) + if (!page->block[(phys_addr >> 10) & 3]) mem_flush_write_page(phys_addr, cs+cpu_state.pc); block_current = (block_current + 1) & BLOCK_MASK; block = &codeblock[block_current]; -// if (block->pc == 0xb00b4ff5) -// pclog("Init target block\n"); if (block->pc != 0) { -// pclog("Reuse block : was %08x now %08x\n", block->pc, cs+pc); delete_block(block); cpu_recomp_reuse++; } @@ -275,6 +256,8 @@ void codegen_block_init(uint32_t phys_addr) block->_cs = cs; block->pnt = block_current; block->phys = phys_addr; + block->dirty_mask = &page->dirty_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + block->dirty_mask2 = NULL; block->next = block->prev = NULL; block->next_2 = block->prev_2 = NULL; block->page_mask = 0; @@ -294,7 +277,7 @@ void codegen_block_start_recompile(codeblock_t *block) int has_evicted = 0; page_t *page = &pages[block->phys >> 12]; - if (!page->block) + if (!page->block[(block->phys >> 10) & 3]) mem_flush_write_page(block->phys, cs+cpu_state.pc); block_num = HASH(block->phys); @@ -360,8 +343,6 @@ void codegen_block_start_recompile(codeblock_t *block) addbyte(0xBD); addquad(((uintptr_t)&cpu_state) + 128); -// pclog("New block %i for %08X %03x\n", block_current, cs+pc, block_num); - last_op32 = -1; last_ea_seg = NULL; last_ssegs = -1; @@ -405,59 +386,60 @@ void codegen_block_remove() void codegen_block_generate_end_mask() { codeblock_t *block = &codeblock[block_current]; - uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff); - uint32_t end_pc = ((codegen_endpc + 3) & 0xffc) | (block->phys & ~0xfff); + uint32_t start_pc; + uint32_t end_pc; block->endpc = codegen_endpc; block->page_mask = 0; - start_pc = block->pc & 0xffc; - start_pc &= ~PAGE_MASK_MASK; - end_pc = ((block->endpc & 0xffc) + PAGE_MASK_MASK) & ~PAGE_MASK_MASK; - if (end_pc > 0xfff || end_pc < start_pc) - end_pc = 0xfff; + start_pc = (block->pc & 0x3ff) & ~15; + if ((block->pc ^ block->endpc) & ~0x3ff) + end_pc = 0x3ff & ~15; + else + end_pc = (block->endpc & 0x3ff) & ~15; + if (end_pc < start_pc) + end_pc = 0x3ff; start_pc >>= PAGE_MASK_SHIFT; end_pc >>= PAGE_MASK_SHIFT; -// pclog("block_end: %08x %08x\n", start_pc, end_pc); for (; start_pc <= end_pc; start_pc++) { block->page_mask |= ((uint64_t)1 << start_pc); -// pclog(" %08x %llx\n", start_pc, block->page_mask); } - pages[block->phys >> 12].code_present_mask |= block->page_mask; + pages[block->phys >> 12].code_present_mask[(block->phys >> 10) & 3] |= block->page_mask; block->phys_2 = -1; block->page_mask2 = 0; block->next_2 = block->prev_2 = NULL; - if ((block->pc ^ block->endpc) & ~0xfff) + if ((block->pc ^ block->endpc) & ~0x3ff) { block->phys_2 = get_phys_noabrt(block->endpc); if (block->phys_2 != -1) { -// pclog("start block - %08x %08x %p %p %p %08x\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, (void *)pages[block->phys_2 >> 12].block_2, block->phys_2); + page_t *page_2 = &pages[block->phys_2 >> 12]; start_pc = 0; - end_pc = (block->endpc & 0xfff) >> PAGE_MASK_SHIFT; + end_pc = (block->endpc & 0x3ff) >> PAGE_MASK_SHIFT; for (; start_pc <= end_pc; start_pc++) block->page_mask2 |= ((uint64_t)1 << start_pc); - - if (!pages[block->phys_2 >> 12].block_2) + page_2->code_present_mask[(block->phys_2 >> 10) & 3] |= block->page_mask2; + + if (!pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]) mem_flush_write_page(block->phys_2, block->endpc); -// pclog("New block - %08x %08x %p %p phys %08x %08x %016llx\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, block->phys, block->phys_2, block->page_mask2); + if (!block->page_mask2) fatal("!page_mask2\n"); if (block->next_2) { -// pclog(" next_2->pc=%08x\n", block->next_2->pc); if (!block->next_2->pc) fatal("block->next_2->pc=0 %p\n", (void *)block->next_2); } + + block->dirty_mask2 = &page_2->dirty_mask[(block->phys_2 >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; } } -// pclog("block_end: %08x %08x %016llx\n", block->pc, block->endpc, block->page_mask); recomp_page = -1; } @@ -487,16 +469,6 @@ void codegen_block_end_recompile(codeblock_t *block) addbyte(cpu_state_offset(cpu_recomp_ins)); addlong(codegen_block_ins); } -#if 0 - if (codegen_block_full_ins) - { - addbyte(0x81); /*ADD $codegen_block_ins,ins*/ - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)&cpu_recomp_full_ins); - addlong(codegen_block_full_ins); - } -#endif addbyte(0x48); /*ADDL $40,%rsp*/ addbyte(0x83); addbyte(0xC4); @@ -523,7 +495,6 @@ void codegen_block_end_recompile(codeblock_t *block) block->next_2 = block->prev_2 = NULL; codegen_block_generate_end_mask(); add_to_block_list(block); -// pclog("End block %i\n", block_num); } void codegen_flush() @@ -730,7 +701,7 @@ static x86seg *codegen_generate_ea_16_long(x86seg *op_ea_seg, uint32_t fetchdat, } return op_ea_seg; } -//#if 0 + static x86seg *codegen_generate_ea_32_long(x86seg *op_ea_seg, uint32_t fetchdat, int op_ssegs, uint32_t *op_pc, int stack_offset) { uint32_t new_eaaddr; @@ -932,7 +903,7 @@ static x86seg *codegen_generate_ea_32_long(x86seg *op_ea_seg, uint32_t fetchdat, } return op_ea_seg; } -//#endif + void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc) { codeblock_t *block = &codeblock[block_current]; @@ -1117,17 +1088,6 @@ generate_call: addlong(codegen_block_ins); codegen_block_ins = 0; } -#if 0 - if (codegen_block_full_ins) - { - addbyte(0x81); /*ADD $codegen_block_ins,ins*/ - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)&cpu_recomp_full_ins); - addlong(codegen_block_full_ins); - codegen_block_full_ins = 0; - } -#endif } if (recomp_op_table && recomp_op_table[(opcode | op_32) & 0x1ff]) { @@ -1147,8 +1107,6 @@ generate_call: } op = op_table[((opcode >> opcode_shift) | op_32) & opcode_mask]; -// if (output) -// pclog("Generate call at %08X %02X %08X %02X %08X %08X %08X %08X %08X %02X %02X %02X %02X\n", &codeblock[block_current][block_pos], opcode, new_pc, ram[old_pc], EAX, EBX, ECX, EDX, ESI, ram[0x7bd2+6],ram[0x7bd2+7],ram[0x7bd2+8],ram[0x7bd2+9]); if (op_ssegs != last_ssegs) { last_ssegs = op_ssegs; @@ -1157,7 +1115,7 @@ generate_call: addbyte(cpu_state_offset(ssegs)); addbyte(op_ssegs); } -//#if 0 + if ((!test_modrm || (op_table == x86_dynarec_opcodes && opcode_modrm[opcode]) || (op_table == x86_dynarec_opcodes_0f && opcode_0f_modrm[opcode]))/* && !(op_32 & 0x200)*/) @@ -1183,10 +1141,9 @@ generate_call: op_ea_seg = codegen_generate_ea_32_long(op_ea_seg, fetchdat, op_ssegs, &op_pc, stack_offset); op_pc -= pc_off; } -//#endif + if (op_ea_seg != last_ea_seg) { -// last_ea_seg = op_ea_seg; addbyte(0xC7); /*MOVL $&_ds,(ea_seg)*/ addbyte(0x45); addbyte(cpu_state_offset(ea_seg)); @@ -1223,8 +1180,6 @@ generate_call: addbyte(0x0F); addbyte(0x85); /*JNZ 0*/ addlong((uint32_t)&block->data[BLOCK_EXIT_OFFSET] - (uint32_t)(&block->data[block_pos + 4])); -// call(block, codegen_debug); - codegen_endpc = (cs + cpu_state.pc) + 8; } diff --git a/src/CPU/codegen_x86.c b/src/CPU/codegen_x86.c index a4f0ffe59..50fa7bc35 100644 --- a/src/CPU/codegen_x86.c +++ b/src/CPU/codegen_x86.c @@ -1218,25 +1218,11 @@ void codegen_reset() void dump_block() { - codeblock_t *block = pages[0x119000 >> 12].block; - - pclog("dump_block:\n"); - while (block) - { - uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff); - uint32_t end_pc = (block->endpc & 0xffc) | (block->phys & ~0xfff); - pclog(" %p : %08x-%08x %08x-%08x %p %p\n", (void *)block, start_pc, end_pc, block->pc, block->endpc, (void *)block->prev, (void *)block->next); - if (!block->pc) - fatal("Dead PC=0\n"); - - block = block->next; - } - pclog("dump_block done\n"); } static void add_to_block_list(codeblock_t *block) { - codeblock_t *block_prev = pages[block->phys >> 12].block; + codeblock_t *block_prev = pages[block->phys >> 12].block[(block->phys >> 10) & 3]; if (!block->page_mask) fatal("add_to_block_list - mask = 0\n"); @@ -1245,12 +1231,12 @@ static void add_to_block_list(codeblock_t *block) { block->next = block_prev; block_prev->prev = block; - pages[block->phys >> 12].block = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; } else { block->next = NULL; - pages[block->phys >> 12].block = block; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block; } if (block->next) @@ -1261,18 +1247,18 @@ static void add_to_block_list(codeblock_t *block) if (block->page_mask2) { - block_prev = pages[block->phys_2 >> 12].block_2; + block_prev = pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]; if (block_prev) { block->next_2 = block_prev; block_prev->prev_2 = block; - pages[block->phys_2 >> 12].block_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; } else { block->next_2 = NULL; - pages[block->phys_2 >> 12].block_2 = block; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block; } } } @@ -1290,7 +1276,7 @@ static void remove_from_block_list(codeblock_t *block, uint32_t pc) } else { - pages[block->phys >> 12].block = block->next; + pages[block->phys >> 12].block[(block->phys >> 10) & 3] = block->next; if (block->next) block->next->prev = NULL; else @@ -1312,7 +1298,7 @@ static void remove_from_block_list(codeblock_t *block, uint32_t pc) else { /* pclog(" pages.block_2=%p 3 %p %p\n", (void *)block->next_2, (void *)block, (void *)pages[block->phys_2 >> 12].block_2); */ - pages[block->phys_2 >> 12].block_2 = block->next_2; + pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3] = block->next_2; if (block->next_2) block->next_2->prev_2 = NULL; else @@ -1337,7 +1323,7 @@ static void delete_block(codeblock_t *block) void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) { - struct codeblock_t *block = page->block; + struct codeblock_t *block = page->block[(phys_addr >> 10) & 3]; while (block) { @@ -1351,7 +1337,7 @@ void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr) block = block->next; } - block = page->block_2; + block = page->block_2[(phys_addr >> 10) & 3]; while (block) { @@ -1372,14 +1358,12 @@ void codegen_block_init(uint32_t phys_addr) int has_evicted = 0; page_t *page = &pages[phys_addr >> 12]; - if (!page->block) + if (!page->block[(phys_addr >> 10) & 3]) mem_flush_write_page(phys_addr, cs+cpu_state.pc); block_current = (block_current + 1) & BLOCK_MASK; block = &codeblock[block_current]; - /* if (block->pc == 0xb00b4ff5) - pclog("Init target block\n"); */ if (block->pc != 0) { /* pclog("Reuse block : was %08x now %08x\n", block->pc, cs+pc); */ @@ -1394,6 +1378,8 @@ void codegen_block_init(uint32_t phys_addr) block->_cs = cs; block->pnt = block_current; block->phys = phys_addr; + block->dirty_mask = &page->dirty_mask[(phys_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; + block->dirty_mask2 = NULL; block->next = block->prev = NULL; block->next_2 = block->prev_2 = NULL; block->page_mask = 0; @@ -1412,7 +1398,7 @@ void codegen_block_start_recompile(codeblock_t *block) int has_evicted = 0; page_t *page = &pages[block->phys >> 12]; - if (!page->block) + if (!page->block[(block->phys >> 10) & 3]) mem_flush_write_page(block->phys, cs+cpu_state.pc); block_num = HASH(block->phys); @@ -1497,17 +1483,19 @@ void codegen_block_remove() void codegen_block_generate_end_mask() { codeblock_t *block = &codeblock[block_current]; - uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff); - uint32_t end_pc = ((codegen_endpc + 3) & 0xffc) | (block->phys & ~0xfff); + uint32_t start_pc; + uint32_t end_pc; block->endpc = codegen_endpc; block->page_mask = 0; - start_pc = block->pc & 0xffc; - start_pc &= ~PAGE_MASK_MASK; - end_pc = ((block->endpc & 0xffc) + PAGE_MASK_MASK) & ~PAGE_MASK_MASK; - if (end_pc > 0xfff || end_pc < start_pc) - end_pc = 0xfff; + start_pc = (block->pc & 0x3ff) & ~15; + if ((block->pc ^ block->endpc) & ~0x3ff) + end_pc = 0x3ff & ~15; + else + end_pc = (block->endpc & 0x3ff) & ~15; + if (end_pc < start_pc) + end_pc = 0x3ff; start_pc >>= PAGE_MASK_SHIFT; end_pc >>= PAGE_MASK_SHIFT; @@ -1518,26 +1506,27 @@ void codegen_block_generate_end_mask() /* pclog(" %08x %llx\n", start_pc, block->page_mask); */ } - pages[block->phys >> 12].code_present_mask |= block->page_mask; + pages[block->phys >> 12].code_present_mask[(block->phys >> 10) & 3] |= block->page_mask; block->phys_2 = -1; block->page_mask2 = 0; block->next_2 = block->prev_2 = NULL; - if ((block->pc ^ block->endpc) & ~0xfff) + if ((block->pc ^ block->endpc) & ~0x3ff) { block->phys_2 = get_phys_noabrt(block->endpc); if (block->phys_2 != -1) { - /* pclog("start block - %08x %08x %p %p %p %08x\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, (void *)pages[block->phys_2 >> 12].block_2, block->phys_2); */ + page_t *page_2 = &pages[block->phys_2 >> 12]; start_pc = 0; - end_pc = (block->endpc & 0xfff) >> PAGE_MASK_SHIFT; + end_pc = (block->endpc & 0x3ff) >> PAGE_MASK_SHIFT; for (; start_pc <= end_pc; start_pc++) block->page_mask2 |= ((uint64_t)1 << start_pc); + page_2->code_present_mask[(block->phys_2 >> 10) & 3] |= block->page_mask2; - if (!pages[block->phys_2 >> 12].block_2) + if (!pages[block->phys_2 >> 12].block_2[(block->phys_2 >> 10) & 3]) mem_flush_write_page(block->phys_2, block->endpc); - /* pclog("New block - %08x %08x %p %p phys %08x %08x %016llx\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, block->phys, block->phys_2, block->page_mask2); */ + if (!block->page_mask2) fatal("!page_mask2\n"); if (block->next_2) @@ -1546,6 +1535,8 @@ void codegen_block_generate_end_mask() if (!block->next_2->pc) fatal("block->next_2->pc=0 %p\n", (void *)block->next_2); } + + block->dirty_mask2 = &page_2->dirty_mask[(block->phys_2 >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK]; } } diff --git a/src/SOUND/openal.c b/src/SOUND/openal.c index e6dd1add2..b044c0e5a 100644 --- a/src/SOUND/openal.c +++ b/src/SOUND/openal.c @@ -21,7 +21,7 @@ static ALuint source[2]; /* audio source */ #define BUFLEN SOUNDBUFLEN -void closeal(ALvoid); +void closeal(void); ALvoid alutInit(ALint *argc,ALbyte **argv) { ALCcontext *Context; @@ -62,7 +62,7 @@ void initalmain(int argc, char *argv[]) #endif } -void closeal(ALvoid) +void closeal(void) { #ifdef USE_OPENAL alutExit(); diff --git a/src/SOUND/sound.h b/src/SOUND/sound.h index d6c82fb6a..05540b403 100644 --- a/src/SOUND/sound.h +++ b/src/SOUND/sound.h @@ -43,7 +43,7 @@ void sound_reset(); void sound_cd_thread_reset(); -void closeal(ALvoid); +void closeal(void); void initalmain(int argc, char *argv[]); void inital(); void givealbuffer(float *buf); diff --git a/src/VIDEO/vid_voodoo.c b/src/VIDEO/vid_voodoo.c index 9b823e1a2..f033acdbb 100644 --- a/src/VIDEO/vid_voodoo.c +++ b/src/VIDEO/vid_voodoo.c @@ -14,6 +14,15 @@ #include "vid_voodoo.h" #include "vid_voodoo_dither.h" +#ifdef MIN +#undef MIN +#endif +#ifdef ABS +#undef ABS +#endif +#ifdef CLAMP +#undef CLAMP +#endif #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -53,7 +62,6 @@ static int tris = 0; static uint64_t status_time = 0; static uint64_t voodoo_time = 0; static int voodoo_render_time[2] = {0, 0}; -static int voodoo_render_time_old[2] = {0, 0}; typedef union int_float { @@ -384,9 +392,9 @@ typedef struct voodoo_t int wake_timer; - uint8_t thefilter[256][256]; /* pixel filter, feeding from one or two */ - uint8_t thefilterg[256][256]; /* for green */ - uint8_t thefilterb[256][256]; /* for blue */ + uint8_t thefilter[256][256]; // pixel filter, feeding from one or two + uint8_t thefilterg[256][256]; // for green + uint8_t thefilterb[256][256]; // for blue /* the voodoo adds purple lines for some reason */ uint16_t purpleline[256][3]; @@ -402,7 +410,7 @@ typedef struct voodoo_t void *codegen_data; } voodoo_t; -static __inline void wait_for_render_thread_idle(voodoo_t *voodoo); +static inline void wait_for_render_thread_idle(voodoo_t *voodoo); enum { @@ -552,7 +560,6 @@ enum SST_cmdFifoHoles = 0x1f8, SST_fbiInit4 = 0x200, - SST_vRetrace = 0x204, SST_backPorch = 0x208, SST_videoDimensions = 0x20c, @@ -715,7 +722,7 @@ enum SST_remap_fdAdY = 0x00d8 | 0x400, SST_remap_fdSdY = 0x00e4 | 0x400, SST_remap_fdTdY = 0x00f0 | 0x400, - SST_remap_fdWdY = 0x00fc | 0x400 + SST_remap_fdWdY = 0x00fc | 0x400, }; enum @@ -994,10 +1001,8 @@ static void voodoo_update_ncc(voodoo_t *voodoo, int tmu) for (col = 0; col < 256; col++) { int y = (col >> 4), i = (col >> 2) & 3, q = col & 3; - int _y = (col >> 4), _i = (col >> 2) & 3, _q = col & 3; int i_r, i_g, i_b; int q_r, q_g, q_b; - int r, g, b; y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; @@ -1108,8 +1113,6 @@ static void voodoo_recalc_tex(voodoo_t *voodoo, int tmu) int width = 256, height = 256; int shift = 8; int lod; - int lod_min = (voodoo->params.tLOD[tmu] >> 2) & 15; - int lod_max = (voodoo->params.tLOD[tmu] >> 8) & 15; uint32_t base = voodoo->params.texBaseAddr[tmu]; int tex_lod = 0; @@ -1243,6 +1246,7 @@ static void use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) lod_min = (params->tLOD[tmu] >> 2) & 15; lod_max = (params->tLOD[tmu] >> 8) & 15; +// pclog(" add new texture to %i tformat=%i %08x LOD=%i-%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max); for (lod = lod_min; lod <= lod_max; lod++) { @@ -1251,7 +1255,10 @@ static void use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) int x, y; int shift = 8 - params->tex_lod[tmu][lod]; rgba_u *pal; + + //pclog(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]); + switch (params->tformat[tmu]) { case TEX_RGB332: @@ -1489,17 +1496,17 @@ static void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu) int c; memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0])); +// pclog("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present)); for (c = 0; c < TEX_CACHE_MAX; c++) { if (voodoo->texture_cache[tmu][c].base != -1) { - int lod_min = (voodoo->texture_cache[tmu][c].tLOD >> 2) & 15; - int lod_max = (voodoo->texture_cache[tmu][c].tLOD >> 8) & 15; int addr_start = voodoo->texture_cache[tmu][c].addr_start; int addr_end = voodoo->texture_cache[tmu][c].addr_end; if (dirty_addr >= (addr_start & voodoo->texture_mask & ~0x3ff) && dirty_addr < (((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff)) { +// pclog(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base); if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] || (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1])) @@ -1552,7 +1559,7 @@ typedef struct voodoo_state_t int32_t ib, ig, ir, ia; int32_t z; - int64_t new_depth; + int32_t new_depth; int64_t tmu0_s, tmu0_t; int64_t tmu0_w; @@ -1563,7 +1570,7 @@ typedef struct voodoo_state_t int pixel_count, texel_count; int x, x2; - uint64_t w_depth; + uint32_t w_depth; float log_temp; uint32_t ebp_store; @@ -1594,7 +1601,7 @@ static uint8_t logtable[256] = 0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff }; -static __inline int fastlog(uint64_t val) +static inline int fastlog(uint64_t val) { uint64_t oldval = val; int exp = 63; @@ -1603,32 +1610,32 @@ static __inline int fastlog(uint64_t val) if (!val || val & (1ULL << 63)) return 0x80000000; - if (!(val & 0xffffffff00000000ll)) + if (!(val & 0xffffffff00000000)) { exp -= 32; val <<= 32; } - if (!(val & 0xffff000000000000ll)) + if (!(val & 0xffff000000000000)) { exp -= 16; val <<= 16; } - if (!(val & 0xff00000000000000ll)) + if (!(val & 0xff00000000000000)) { exp -= 8; val <<= 8; } - if (!(val & 0xf000000000000000ll)) + if (!(val & 0xf000000000000000)) { exp -= 4; val <<= 4; } - if (!(val & 0xc000000000000000ll)) + if (!(val & 0xc000000000000000)) { exp -= 2; val <<= 2; } - if (!(val & 0x8000000000000000ll)) + if (!(val & 0x8000000000000000)) { exp -= 1; val <<= 1; @@ -1642,10 +1649,11 @@ static __inline int fastlog(uint64_t val) return (exp << 8) | logtable[frac]; } -static __inline int fls(uint16_t val) +static inline int voodoo_fls(uint16_t val) { int num = 0; +//pclog("fls(%04x) = ", val); if (!(val & 0xff00)) { num += 8; @@ -1666,6 +1674,7 @@ static __inline int fls(uint16_t val) num += 1; val <<= 1; } +//pclog("%i %04x\n", num, val); return num; } @@ -1676,7 +1685,7 @@ typedef struct voodoo_texture_state_t int tex_shift; } voodoo_texture_state_t; -static __inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) +static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) { uint32_t dat; @@ -1716,7 +1725,7 @@ static __inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *tex #define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4)) #define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4)) -static __inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) +static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) { rgba_u dat[4]; @@ -1769,9 +1778,8 @@ static __inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *t state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8; } -static __inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) { - rgba_u tex_samples[4]; voodoo_texture_state_t texture_state; int d[4]; int s, t; @@ -1807,26 +1815,54 @@ static __inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *param s >>= 4; t >>= 4; - +//if (x == 80) +//if (voodoo_output) +// pclog("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt); d[0] = (16 - _ds) * (16 - dt); d[1] = _ds * (16 - dt); d[2] = (16 - _ds) * dt; d[3] = _ds * dt; +// texture_state.s = s; +// texture_state.t = t; tex_read_4(state, &texture_state, s, t, d, tmu, x); + + +/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8; + state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8; + state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8; + state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/ +/* state->tex_r = tex_samples[0].r; + state->tex_g = tex_samples[0].g; + state->tex_b = tex_samples[0].b; + state->tex_a = tex_samples[0].a;*/ } else { + // rgba_t tex_samples; + // voodoo_texture_state_t texture_state; +// int s = state->tex_s >> (18+state->lod); +// int t = state->tex_t >> (18+state->lod); + // int s, t; + +// state->tex_s -= 1 << (17+state->lod); +// state->tex_t -= 1 << (17+state->lod); + s = state->tex_s >> (4+tex_lod); t = state->tex_t >> (4+tex_lod); texture_state.s = s; texture_state.t = t; tex_read(state, &texture_state, tmu); + +/* state->tex_r = tex_samples[0].rgba.r; + state->tex_g = tex_samples[0].rgba.g; + state->tex_b = tex_samples[0].rgba.b; + state->tex_a = tex_samples[0].rgba.a;*/ } } -static __inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) { if (params->textureMode[tmu] & 1) { @@ -1940,7 +1976,7 @@ static __inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, } \ else \ { \ - int fog_r, fog_g, fog_b, fog_a; \ + int fog_r, fog_g, fog_b, fog_a = 0; \ int fog_idx; \ \ if (!(params->fogMode & FOG_ADD)) \ @@ -2061,7 +2097,7 @@ static __inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, do \ { \ int _a; \ - int newdest_r, newdest_g, newdest_b; \ + int newdest_r = 0, newdest_g = 0, newdest_b = 0; \ \ switch (dest_afunc) \ { \ @@ -2216,12 +2252,12 @@ static __inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, #define dither2x2 (params->fbzMode & FBZ_DITHER_2x2) /*Perform texture fetch and blending for both TMUs*/ -static __inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) +static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) { int r,g,b,a; int c_reverse, a_reverse; - int c_reverse1, a_reverse1; - int factor_r, factor_g, factor_b, factor_a; +// int c_reverse1, a_reverse1; + int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0; voodoo_tmu_fetch(voodoo, params, state, 1, x); @@ -2235,8 +2271,8 @@ static __inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_ c_reverse = !tc_reverse_blend; a_reverse = !tca_reverse_blend; } - c_reverse1 = c_reverse; - a_reverse1 = a_reverse; +/* c_reverse1 = c_reverse; + a_reverse1 = a_reverse;*/ if (tc_sub_clocal_1) { switch (tc_mselect_1) @@ -2507,6 +2543,8 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT; state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS; state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT; +// int last_x; +// pclog("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir); for (c = 0; c <= LOD_MAX; c++) { @@ -2551,14 +2589,18 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood yend = params->clipHighY; state->y = ystart; +// yend--; #ifndef NO_CODEGEN if (voodoo->use_recompiler) voodoo_draw = voodoo_get_block(voodoo, params, state, odd_even); + else + voodoo_draw = NULL; #endif if (voodoo_output) pclog("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC); +// pclog("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17)); for (; state->y < yend; state->y++) { int x, x2; @@ -2623,7 +2665,7 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->w += (params->dWdX * dx); if (voodoo_output) - pclog("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << 17+state->lod)) >> (18+state->lod)); + pclog("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod)); if (params->fbzMode & 1) { @@ -2682,8 +2724,8 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood if (x2 > x && state->xdir < 0) goto next_line; - state->fb_mem = fb_mem = &voodoo->fb_mem[params->draw_offset + (real_y * voodoo->row_width)]; - state->aux_mem = aux_mem = &voodoo->fb_mem[(params->aux_offset + (real_y * voodoo->row_width)) & voodoo->fb_mask]; + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * voodoo->row_width)]; + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * voodoo->row_width)) & voodoo->fb_mask]; if (voodoo_output) pclog("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2); @@ -2710,17 +2752,17 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood if (voodoo_output) pclog(" X=%03i T=%08x\n", x, state->tmu0_t); +// if (voodoo->fbzMode & FBZ_RGB_WMASK) { int update = 1; uint8_t cother_r, cother_g, cother_b, aother; uint8_t clocal_r, clocal_g, clocal_b, alocal; - int src_r, src_g, src_b, src_a; + int src_r = 0, src_g = 0, src_b = 0, src_a = 0; int msel_r, msel_g, msel_b, msel_a; uint8_t dest_r, dest_g, dest_b, dest_a; uint16_t dat; - uint16_t aux_dat; int sel; - int64_t new_depth, w_depth; + int32_t new_depth, w_depth; if (state->w & 0xffff00000000) w_depth = 0; @@ -2728,13 +2770,15 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood w_depth = 0xf001; else { - int exp = fls((uint16_t)((uint32_t)state->w >> 16)); + int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16)); int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff; w_depth = (exp << 12) + mant + 1; if (w_depth > 0xffff) w_depth = 0xffff; } +// w_depth = CLAMP16(w_depth); + if (params->fbzMode & FBZ_W_BUFFER) new_depth = w_depth; else @@ -3138,7 +3182,6 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e { voodoo_state_t state; int vertexAy_adjusted; - int vertexBy_adjusted; int vertexCy_adjusted; int dx, dy; @@ -3214,7 +3257,6 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e state.vertexCx |= 0xffff0000; vertexAy_adjusted = (state.vertexAy+7) >> 4; - vertexBy_adjusted = (state.vertexBy+7) >> 4; vertexCy_adjusted = (state.vertexCy+7) >> 4; if (state.vertexBy - state.vertexAy) @@ -3283,14 +3325,14 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even); } -static __inline void wake_render_thread(voodoo_t *voodoo) +static inline void wake_render_thread(voodoo_t *voodoo) { thread_set_event(voodoo->wake_render_thread[0]); /*Wake up render thread if moving from idle*/ if (voodoo->render_threads == 2) thread_set_event(voodoo->wake_render_thread[1]); /*Wake up render thread if moving from idle*/ } -static __inline void wait_for_render_thread_idle(voodoo_t *voodoo) +static inline void wait_for_render_thread_idle(voodoo_t *voodoo) { while (!PARAM_EMPTY_1 || (voodoo->render_threads == 2 && !PARAM_EMPTY_2) || voodoo->render_voodoo_busy[0] || (voodoo->render_threads == 2 && voodoo->render_voodoo_busy[1])) { @@ -3343,7 +3385,7 @@ static void render_thread_2(void *param) render_thread(param, 1); } -static __inline void queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) +static inline void queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) { voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK]; @@ -3709,7 +3751,7 @@ static void blit_start(voodoo_t *voodoo) int size_x = ABS(voodoo->bltSizeX), size_y = ABS(voodoo->bltSizeY); int x_dir = (voodoo->bltSizeX > 0) ? 1 : -1; int y_dir = (voodoo->bltSizeY > 0) ? 1 : -1; - int src_x, dst_x; + int dst_x; int src_y = voodoo->bltSrcY & 0x7ff, dst_y = voodoo->bltDstY & 0x7ff; int src_stride = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcXYStride & 0x3f) * 32*2) : (voodoo->bltSrcXYStride & 0xff8); int dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); @@ -3818,7 +3860,7 @@ skip_pixel_fill: case BLIT_COMMAND_SGRAM_FILL: /*32x32 tiles - 2kb*/ dst_y = voodoo->bltDstY & 0x3ff; - size_x = voodoo->bltSizeX & 0x1ff; + size_x = voodoo->bltSizeX & 0x1ff; //512*8 = 4kb size_y = voodoo->bltSizeY & 0x3ff; dat64 = voodoo->bltColorFg | ((uint64_t)voodoo->bltColorFg << 16) | @@ -3861,7 +3903,6 @@ skip_pixel_fill: static void blit_data(voodoo_t *voodoo, uint32_t data) { - uint32_t data_orig = data; int src_bits = 32; uint32_t base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); uint16_t *dst = (uint16_t *)&voodoo->fb_mem[base_addr + voodoo->blt.dst_y*voodoo->blt.dst_stride]; @@ -3871,8 +3912,8 @@ static void blit_data(voodoo_t *voodoo, uint32_t data) while (src_bits && voodoo->blt.cur_x <= voodoo->blt.size_x) { - int r, g, b; - uint16_t src_dat, dst_dat; + int r = 0, g = 0, b = 0; + uint16_t src_dat = 0, dst_dat; int x = (voodoo->blt.x_dir > 0) ? (voodoo->blt.dst_x + voodoo->blt.cur_x) : (voodoo->blt.dst_x - voodoo->blt.cur_x); int rop = 0; @@ -4033,6 +4074,7 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) chip = 0xf; tempif.i = val; +//pclog("voodoo_reg_write_l: addr=%08x val=%08x(%f) chip=%x\n", addr, val, tempif.f, chip); addr &= 0x3fc; if ((voodoo->fbiInit3 & FBIINIT3_REMAP) && addr < 0x100 && ad21) @@ -4040,6 +4082,7 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) switch (addr) { case SST_swapbufferCMD: +// pclog(" start swap buffer command\n"); if (TRIPLE_BUFFER) { @@ -4056,6 +4099,7 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) voodoo->params.swapbufferCMD = val; pclog("Swap buffer %08x %d %p\n", val, voodoo->swap_count, &voodoo->swap_count); +// voodoo->front_offset = params->front_offset; wait_for_render_thread_idle(voodoo); if (!(val & 1)) { @@ -4513,10 +4557,12 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_sVx: tempif.i = val; voodoo->verts[3].sVx = tempif.f; +// pclog("sVx[%i]=%f\n", voodoo->vertex_num, tempif.f); break; case SST_sVy: tempif.i = val; voodoo->verts[3].sVy = tempif.f; +// pclog("sVy[%i]=%f\n", voodoo->vertex_num, tempif.f); break; case SST_sARGB: voodoo->verts[3].sBlue = (float)(val & 0xff); @@ -4574,11 +4620,13 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) break; case SST_sBeginTriCMD: +// pclog("sBeginTriCMD %i %f\n", voodoo->vertex_num, voodoo->verts[4].sVx); voodoo->verts[0] = voodoo->verts[3]; voodoo->vertex_num = 1; voodoo->num_verticies = 1; break; case SST_sDrawTriCMD: +// pclog("sDrawTriCMD %i %i %i\n", voodoo->num_verticies, voodoo->vertex_num, voodoo->sSetupMode & SETUPMODE_STRIP_MODE); if (voodoo->vertex_num == 3) voodoo->vertex_num = (voodoo->sSetupMode & SETUPMODE_STRIP_MODE) ? 1 : 0; voodoo->verts[voodoo->vertex_num] = voodoo->verts[3]; @@ -4587,6 +4635,7 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) voodoo->vertex_num++; if (voodoo->num_verticies == 3) { +// pclog("triangle_setup\n"); triangle_setup(voodoo); voodoo->num_verticies = 2; @@ -4599,11 +4648,13 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) voodoo->bltSrcBaseAddr = val & 0x3fffff; break; case SST_bltDstBaseAddr: +// pclog("Write bltDstBaseAddr %08x\n", val); voodoo->bltDstBaseAddr = val & 0x3fffff; break; case SST_bltXYStrides: voodoo->bltSrcXYStride = val & 0xfff; voodoo->bltDstXYStride = (val >> 16) & 0xfff; +// pclog("Write bltXYStrides %08x\n", val); break; case SST_bltSrcChromaRange: voodoo->bltSrcChromaRange = val; @@ -4637,12 +4688,14 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) voodoo->bltSrcY = (val >> 16) & 0x7ff; break; case SST_bltDstXY: +// pclog("Write bltDstXY %08x\n", val); voodoo->bltDstX = val & 0x7ff; voodoo->bltDstY = (val >> 16) & 0x7ff; if (val & (1 << 31)) blit_start(voodoo); break; case SST_bltSize: +// pclog("Write bltSize %08x\n", val); voodoo->bltSizeX = val & 0xfff; if (voodoo->bltSizeX & 0x800) voodoo->bltSizeX |= 0xfffff000; @@ -4659,12 +4712,14 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) voodoo->bltRop[3] = (val >> 12) & 0xf; break; case SST_bltColor: +// pclog("Write bltColor %08x\n", val); voodoo->bltColorFg = val & 0xffff; voodoo->bltColorBg = (val >> 16) & 0xffff; break; case SST_bltCommand: voodoo->bltCommand = val; +// pclog("Write bltCommand %08x\n", val); if (val & (1 << 31)) blit_start(voodoo); break; @@ -5135,6 +5190,7 @@ static uint16_t voodoo_fb_readw(uint32_t addr, void *p) temp = *(uint16_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); +// pclog("voodoo_fb_readw : %08X %08X %i %i %08X %08X %08x:%08x %i\n", addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++); return temp; } static uint32_t voodoo_fb_readl(uint32_t addr, void *p) @@ -5153,10 +5209,11 @@ static uint32_t voodoo_fb_readl(uint32_t addr, void *p) temp = *(uint32_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); +// pclog("voodoo_fb_readl : %08X %08x %08X x=%i y=%i %08X %08X %08x:%08x %i ro=%08x rw=%i\n", addr, read_addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++, voodoo->fb_read_offset, voodoo->row_width); return temp; } -static __inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y) +static inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y) { int r, g, b; @@ -5194,11 +5251,19 @@ static void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) rgba8_t colour_data; uint16_t depth_data; uint8_t alpha_data; - int write_mask; + int write_mask = 0; + + colour_data.r = colour_data.g = colour_data.b = colour_data.a = 0; depth_data = voodoo->params.zaColor & 0xffff; alpha_data = voodoo->params.zaColor >> 24; - + +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// pclog("voodoo_fb_writew : %08X %04X\n", addr, val); + + switch (voodoo->lfbMode & LFB_FORMAT_MASK) { case LFB_FORMAT_RGB565: @@ -5233,6 +5298,8 @@ static void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// pclog("fb_writew %08x %i %i %i %08x\n", addr, x, y, voodoo->row_width, write_addr); if (voodoo->lfbMode & 0x100) { @@ -5256,7 +5323,7 @@ static void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) if (params->fogMode & FOG_ENABLE) { int32_t z = new_depth << 12; - int64_t w_depth = new_depth; + int64_t w_depth = (int64_t)(int32_t)new_depth; int32_t ia = alpha_data << 12; APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); @@ -5309,11 +5376,15 @@ static void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p) rgba8_t colour_data[2]; uint16_t depth_data[2]; uint8_t alpha_data[2]; - int write_mask, count = 1; + int write_mask = 0, count = 1; depth_data[0] = depth_data[1] = voodoo->params.zaColor & 0xffff; alpha_data[0] = alpha_data[1] = voodoo->params.zaColor >> 24; - +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// pclog("voodoo_fb_writel : %08X %08X\n", addr, val); + switch (voodoo->lfbMode & LFB_FORMAT_MASK) { case LFB_FORMAT_RGB565: @@ -5365,6 +5436,8 @@ static void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p) write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// pclog("fb_writel %08x x=%i y=%i rw=%i %08x wo=%08x\n", addr, x, y, voodoo->row_width, write_addr, voodoo->fb_write_offset); if (voodoo->lfbMode & 0x100) { @@ -5457,6 +5530,8 @@ static void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) if (tmu && !voodoo->dual_tmus) return; +// pclog("voodoo_tex_writel : %08X %08X %i\n", addr, val, voodoo->params.tformat); + lod = (addr >> 17) & 0xf; t = (addr >> 9) & 0xff; if (voodoo->params.tformat[tmu] & 8) @@ -5471,20 +5546,24 @@ static void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) if (lod > LOD_MAX) return; - + +// if (addr >= 0x200000) +// return; + if (voodoo->params.tformat[tmu] & 8) addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2; else addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]); if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu); } *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val; } #define WAKE_DELAY (TIMER_USEC * 100) -static __inline void wake_fifo_thread(voodoo_t *voodoo) +static inline void wake_fifo_thread(voodoo_t *voodoo) { if (!voodoo->wake_timer) { @@ -5498,7 +5577,7 @@ static __inline void wake_fifo_thread(voodoo_t *voodoo) } } -static __inline void wake_fifo_thread_now(voodoo_t *voodoo) +static inline void wake_fifo_thread_now(voodoo_t *voodoo) { thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ } @@ -5512,10 +5591,9 @@ static void voodoo_wake_timer(void *p) thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ } -static __inline void queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) +static inline void queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) { fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK]; - int c; if (FIFO_FULL) { @@ -5772,7 +5850,9 @@ static void voodoo_pixelclock_update(voodoo_t *voodoo) t /= 2.0f; line_length = (voodoo->hSync & 0xff) + ((voodoo->hSync >> 16) & 0x3ff); - + +// pclog("Pixel clock %f MHz hsync %08x line_length %d\n", t, voodoo->hSync, line_length); + voodoo->pixel_clock = t; clock_const = cpuclock / t; @@ -5782,13 +5862,7 @@ static void voodoo_pixelclock_update(voodoo_t *voodoo) static void voodoo_writel(uint32_t addr, uint32_t val, void *p) { voodoo_t *voodoo = (voodoo_t *)p; - union - { - uint32_t i; - float f; - } tempif; - tempif.i = val; voodoo->wr_count++; addr &= 0xffffff; @@ -5810,6 +5884,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) } else if ((addr & 0x200000) && (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)) { +// pclog("Write CMDFIFO %08x(%08x) %08x %08x\n", addr, voodoo->cmdfifo_base + (addr & 0x3fffc), val, (voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask); *(uint32_t *)&voodoo->fb_mem[(voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask] = val; voodoo->cmdfifo_depth_wr++; if ((voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd) < 20) @@ -5872,6 +5947,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) { voodoo->fbiInit4 = val; voodoo->read_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit4 & 1) ? 2 : 1); +// pclog("fbiInit4 write %08x - read_time=%i\n", val, voodoo->read_time); } break; case SST_backPorch: @@ -5904,6 +5980,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) voodoo->fbiInit1 = val; voodoo->write_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit1 & 2) ? 1 : 0); voodoo->burst_time = pci_burst_time * ((voodoo->fbiInit1 & 2) ? 2 : 1); +// pclog("fbiInit1 write %08x - write_time=%i burst_time=%i\n", val, voodoo->write_time, voodoo->burst_time); } break; case SST_fbiInit2: @@ -5946,6 +6023,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) voodoo->dac_readdata = 0xff; if (val & 0x800) { +// pclog(" dacData read %i %02X\n", voodoo->dac_reg, voodoo->dac_data[7]); if (voodoo->dac_reg == 5) { switch (voodoo->dac_data[7]) @@ -5956,7 +6034,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) } } else - voodoo->dac_readdata = voodoo->dac_data[voodoo->dac_readdata]; + voodoo->dac_readdata = voodoo->dac_data[voodoo->dac_readdata & 7]; } else { @@ -5966,6 +6044,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff00) | val; else voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff) | (val << 8); +// pclog("Write PLL reg %x %04x\n", voodoo->dac_data[4] & 0xf, voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf]); voodoo->dac_reg_ff = !voodoo->dac_reg_ff; if (!voodoo->dac_reg_ff) voodoo->dac_data[4]++; @@ -6009,6 +6088,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) case SST_cmdFifoBaseAddr: voodoo->cmdfifo_base = (val & 0x3ff) << 12; voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12; +// pclog("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); break; case SST_cmdFifoRdPtr: @@ -6053,10 +6133,11 @@ static uint32_t cmdfifo_get(voodoo_t *voodoo) voodoo->cmdfifo_depth_rd++; voodoo->cmdfifo_rp += 4; +// pclog(" CMDFIFO get %08x\n", val); return val; } -static __inline float cmdfifo_get_f(voodoo_t *voodoo) +static inline float cmdfifo_get_f(voodoo_t *voodoo) { union { @@ -6137,10 +6218,13 @@ static void fifo_thread(void *param) int num; int num_verticies; int v_num; - + +// pclog(" CMDFIFO header %08x at %08x\n", header, voodoo->cmdfifo_rp); + switch (header & 7) { case 0: +// pclog("CMDFIFO0\n"); switch ((header >> 3) & 7) { case 0: /*NOP*/ @@ -6148,6 +6232,7 @@ static void fifo_thread(void *param) case 3: /*JMP local frame buffer*/ voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc; +// pclog("JMP to %08x %04x\n", voodoo->cmdfifo_rp, header); break; default: @@ -6158,6 +6243,7 @@ static void fifo_thread(void *param) case 1: num = header >> 16; addr = (header & 0x7ff8) >> 1; +// pclog("CMDFIFO1 addr=%08x\n",addr); while (num--) { uint32_t val = cmdfifo_get(voodoo); @@ -6174,13 +6260,15 @@ static void fifo_thread(void *param) case 3: num = (header >> 29) & 7; - mask = header; + mask = header;//(header >> 10) & 0xff; smode = (header >> 22) & 0xf; voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); num_verticies = (header >> 6) & 0xf; v_num = 0; if (((header >> 3) & 7) == 2) v_num = 1; +// pclog("CMDFIFO3: num=%i verts=%i mask=%02x\n", num, num_verticies, (header >> 10) & 0xff); +// pclog("CMDFIFO3 %02x %i\n", (header >> 10), (header >> 3) & 7); while (num_verticies--) { @@ -6237,6 +6325,7 @@ static void fifo_thread(void *param) num = (header >> 29) & 7; mask = (header >> 15) & 0x3fff; addr = (header & 0x7ff8) >> 1; +// pclog("CMDFIFO4 addr=%08x\n",addr); while (mask) { if (mask & 1) @@ -6261,6 +6350,7 @@ static void fifo_thread(void *param) fatal("CMDFIFO packet 5 has byte disables set %08x\n", header); num = (header >> 3) & 0x7ffff; addr = cmdfifo_get(voodoo) & 0xffffff; +// pclog("CMDFIFO5 addr=%08x num=%i\n", addr, num); switch (header >> 30) { case 2: /*Framebuffer*/ @@ -6318,6 +6408,8 @@ uint8_t voodoo_pci_read(int func, int addr, void *p) if (func) return 0; +// pclog("Voodoo PCI read %08X PC=%08x\n", addr, cpu_state.pc); + switch (addr) { case 0x00: return 0x1a; /*3dfx*/ @@ -6363,6 +6455,8 @@ void voodoo_pci_write(int func, int addr, uint8_t val, void *p) if (func) return; +// pclog("Voodoo PCI write %04X %02X PC=%08x\n", addr, val, cpu_state.pc); + switch (addr) { case 0x04: @@ -6409,7 +6503,10 @@ static void voodoo_calc_clutData(voodoo_t *voodoo) int r = (c >> 8) & 0xf8; int g = (c >> 3) & 0xfc; int b = (c << 3) & 0xf8; - +// r |= (r >> 5); +// g |= (g >> 6); +// b |= (b >> 5); + voodoo->video_16to32[c] = (voodoo->clutData256[r].r << 16) | (voodoo->clutData256[g].g << 8) | voodoo->clutData256[b].b; } } @@ -6431,9 +6528,9 @@ static void voodoo_generate_filter_v1(voodoo_t *voodoo) fcg = FILTCAPG * 6; fcb = FILTCAPB * 5; - for (g=0;g -fcr)) thiscol = g + (difference / 2); if ((diffg < fcg) || (-diffg > -fcg)) @@ -6504,7 +6606,7 @@ static void voodoo_generate_filter_v2(voodoo_t *voodoo) float clr, clg, clb = 0; float fcr, fcg, fcb = 0; - /* pre-clamping */ + // pre-clamping fcr = FILTCAP; fcg = FILTCAPG; @@ -6514,9 +6616,9 @@ static void voodoo_generate_filter_v2(voodoo_t *voodoo) if (fcg > 32) fcg = 32; if (fcb > 32) fcb = 32; - for (g=0;g<256;g++) /* pixel 1 - our target pixel we want to bleed into */ + for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into { - for (h=0;h<256;h++) /* pixel 2 - our main pixel */ + for (h=0;h<256;h++) // pixel 2 - our main pixel { float avg; float avgdiff; @@ -6529,7 +6631,7 @@ static void voodoo_generate_filter_v2(voodoo_t *voodoo) thiscol = thiscolg = thiscolb = g; - /* try lighten */ + // try lighten if (h > g) { clr = clg = clb = avgdiff; @@ -6567,7 +6669,7 @@ static void voodoo_generate_filter_v2(voodoo_t *voodoo) if (difference > FILTCAPB) thiscolb = g; - /* clamp */ + // clamp if (thiscol < 0) thiscol = 0; if (thiscolg < 0) thiscolg = 0; if (thiscolb < 0) thiscolb = 0; @@ -6576,10 +6678,14 @@ static void voodoo_generate_filter_v2(voodoo_t *voodoo) if (thiscolg > 255) thiscolg = 255; if (thiscolb > 255) thiscolb = 255; - /* add to the table */ + // add to the table voodoo->thefilter[g][h] = (thiscol); voodoo->thefilterg[g][h] = (thiscolg); voodoo->thefilterb[g][h] = (thiscolb); + + // debug the ones that don't give us much of a difference + //if (difference < FILTCAP) + //pclog("Voodoofilter: %ix%i - %f difference, %f average difference, R=%f, G=%f, B=%f\n", g, h, difference, avgdiff, thiscol, thiscolg, thiscolb); } lined = g + 3; @@ -6624,7 +6730,7 @@ static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uin { int x; - /* Scratchpad for avoiding feedback streaks */ + // Scratchpad for avoiding feedback streaks uint8_t fil3[(voodoo->h_disp) * 3]; /* 16 to 32-bit */ @@ -6634,7 +6740,7 @@ static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uin fil[x*3+1] = (((src[x] >> 5) & 63) << 2); fil[x*3+2] = (((src[x] >> 11) & 31) << 3); - /* Copy to our scratchpads */ + // Copy to our scratchpads fil3[x*3+0] = fil[x*3+0]; fil3[x*3+1] = fil[x*3+1]; fil3[x*3+2] = fil[x*3+2]; @@ -6690,13 +6796,13 @@ static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uin { int x; - /* Scratchpad for blending filter */ + // Scratchpad for blending filter uint8_t fil3[(voodoo->h_disp) * 3]; /* 16 to 32-bit */ for (x=0; x> 5) & 63) << 2); fil3[x*3+2] = fil[x*3+2] = (((src[x] >> 11) & 31) << 3); @@ -6723,7 +6829,7 @@ static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uin fil[(x-1)*3+2] = voodoo->thefilter [fil3[(x-1)*3+2]][(((src[x] >> 11) & 31) << 3)]; } - /* unroll for edge cases */ + // unroll for edge cases fil3[(column-3)*3] = voodoo->thefilterb [((src[column-3] & 31) << 3)] [((src[column] & 31) << 3)]; fil3[(column-3)*3+1] = voodoo->thefilterg [(((src[column-3] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; @@ -6779,7 +6885,6 @@ void voodoo_callback(void *p) if (voodoo->scrfilter && voodoo->scrfilterEnabled) { - int j, offset; uint8_t fil[(voodoo->h_disp) * 3]; /* interleaved 24-bit RGB */ if (voodoo->type == VOODOO_2) @@ -6804,6 +6909,7 @@ void voodoo_callback(void *p) } if (voodoo->line == voodoo->v_disp) { +// pclog("retrace %i %i %08x %i\n", voodoo->retrace_count, voodoo->swap_interval, voodoo->swap_offset, voodoo->swap_pending); voodoo->retrace_count++; if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) { @@ -6898,6 +7004,7 @@ static void voodoo_speed_changed(void *p) voodoo->read_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit4 & 1) ? 2 : 1); voodoo->write_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit1 & 2) ? 1 : 0); voodoo->burst_time = pci_burst_time * ((voodoo->fbiInit1 & 2) ? 2 : 1); +// pclog("Voodoo read_time=%i write_time=%i burst_time=%i %08x %08x\n", voodoo->read_time, voodoo->write_time, voodoo->burst_time, voodoo->fbiInit1, voodoo->fbiInit4); } void *voodoo_init() @@ -7039,7 +7146,9 @@ void *voodoo_init() void voodoo_close(void *p) { +#ifndef RELEASE_BUILD FILE *f; +#endif voodoo_t *voodoo = (voodoo_t *)p; int c; @@ -7086,77 +7195,111 @@ void voodoo_close(void *p) static device_config_t voodoo_config[] = { { - "type", "Voodoo type", CONFIG_SELECTION, "", 0, + .name = "type", + .description = "Voodoo type", + .type = CONFIG_SELECTION, + .selection = { { - "Voodoo Graphics", VOODOO_1 + .description = "Voodoo Graphics", + .value = VOODOO_1 }, { - "Obsidian SB50 + Amethyst (2 TMUs)", VOODOO_SB50 + .description = "Obsidian SB50 + Amethyst (2 TMUs)", + .value = VOODOO_SB50 }, { - "Voodoo 2", VOODOO_2 + .description = "Voodoo 2", + .value = VOODOO_2 }, { - "" + .description = "" } - } + }, + .default_int = 0 }, { - "framebuffer_memory", "Framebuffer memory size", CONFIG_SELECTION, "", 2, + .name = "framebuffer_memory", + .description = "Framebuffer memory size", + .type = CONFIG_SELECTION, + .selection = { { - "2 MB", 2 + .description = "2 MB", + .value = 2 }, { - "4 MB", 4 + .description = "4 MB", + .value = 4 }, { - "" + .description = "" } - } + }, + .default_int = 2 }, { - "texture_memory", "Texture memory size", CONFIG_SELECTION, "", 2, + .name = "texture_memory", + .description = "Texture memory size", + .type = CONFIG_SELECTION, + .selection = { { - "2 MB", 2 + .description = "2 MB", + .value = 2 }, { - "4 MB",4 + .description = "4 MB", + .value = 4 }, { - "" + .description = "" } - } + }, + .default_int = 2 }, { - "render_threads", "Render threads", CONFIG_SELECTION, "", 2, + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = { { - "1", 1 + .description = "1", + .value = 1 }, { - "2", 2 + .description = "2", + .value = 2 }, { - "" + .description = "" } - } - }, - { - "bilinear", "Bilinear filtering", CONFIG_BINARY, "", 1 - }, - { - "dacfilter", "Screen Filter", CONFIG_BINARY, "", 0 + }, + .default_int = 2 }, #ifndef NO_CODEGEN { - "recompiler", "Recompiler", CONFIG_BINARY, "", 1 + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 }, #endif { - "", "", -1 + .type = -1 } }; diff --git a/src/VIDEO/vid_voodoo_codegen_x86-64.h b/src/VIDEO/vid_voodoo_codegen_x86-64.h index 1caa67311..160708b93 100644 --- a/src/VIDEO/vid_voodoo_codegen_x86-64.h +++ b/src/VIDEO/vid_voodoo_codegen_x86-64.h @@ -42,26 +42,26 @@ static int last_block[2] = {0, 0}; static int next_block_to_write[2] = {0, 0}; #define addbyte(val) \ - code_block[block_pos++] = (uint8_t)val; \ + code_block[block_pos++] = val; \ if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addword(val) \ - *(uint16_t *)&code_block[block_pos] = (uint16_t)val; \ - block_pos += 2; \ - if (block_pos >= BLOCK_SIZE) \ +#define addword(val) \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addlong(val) \ - *(uint32_t *)&code_block[block_pos] = (uint32_t)val; \ - block_pos += 4; \ - if (block_pos >= BLOCK_SIZE) \ +#define addlong(val) \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addquad(val) \ - *(uint64_t *)&code_block[block_pos] = (uint64_t)val; \ - block_pos += 8; \ - if (block_pos >= BLOCK_SIZE) \ +#define addquad(val) \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") @@ -70,7 +70,6 @@ static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull; static __m128i xmm_ff_b;// = 0x00000000ffffffffull; static uint32_t zero = 0; -static double const_1_48 = (double)(1ull << 4); static __m128i alookup[257], aminuslookup[256]; static __m128i minus_254;// = 0xff02ff02ff02ff02ull; @@ -161,7 +160,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x0f); /*MOVZX EAX, logtable[RAX]*/ addbyte(0xb6); addbyte(0x80); - addlong((uint32_t)logtable); + addlong((uint32_t)(uintptr_t)logtable); addbyte(0x09); /*OR EAX, EDX*/ addbyte(0xd0); addbyte(0x03); /*ADD EAX, state->lod*/ @@ -339,7 +338,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x48); addbyte(0x14); addbyte(0x25); - addlong(&zero); + addlong((uint32_t)(uintptr_t)&zero); addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ addbyte(0x96); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -353,7 +352,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x48); addbyte(0x1c); addbyte(0x25); - addlong(&zero); + addlong((uint32_t)(uintptr_t)&zero); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ addbyte(0x9e); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -400,7 +399,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x48); addbyte(0x04); addbyte(0x25); - addlong(&zero); + addlong((uint32_t)(uintptr_t)&zero); addbyte(0x78); /*JS + - clamp on 0*/ addbyte(2+3+2+ 5+5+2); addbyte(0x3b); /*CMP EAX, EBP*/ @@ -501,7 +500,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x49); /*MOV R8, bilinear_lookup*/ addbyte(0xb8); - addquad(bilinear_lookup); + addquad((uintptr_t)bilinear_lookup); addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ addbyte(0x0f); @@ -615,7 +614,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x48); addbyte(0x04); addbyte(0x25); - addlong(&zero); + addlong((uint32_t)(uintptr_t)&zero); addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ addbyte(0x84); addbyte(0x8e); @@ -642,7 +641,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x48); addbyte(0x1c); addbyte(0x25); - addlong(&zero); + addlong((uint32_t)(uintptr_t)&zero); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ addbyte(0x9c); addbyte(0x8e); @@ -1080,7 +1079,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x0f); addbyte(0xef); addbyte(0x83); - addlong((uint32_t)&xmm_00_ff_w[0]); + addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]); } else if (!tc_reverse_blend_1) { @@ -1089,14 +1088,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xef); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); } addbyte(0x66); /*PADDW XMM0, xmm_01_w*/ addbyte(0x0f); addbyte(0xfd); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)&xmm_01_w); + addlong((uint32_t)(uintptr_t)&xmm_01_w); addbyte(0xf3); /*MOVQ XMM1, XMM2*/ addbyte(0x0f); addbyte(0x7e); @@ -1217,7 +1216,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/ addbyte(0x04); addbyte(0x8d); - addlong((uint32_t)i_00_ff_w); + addlong((uint32_t)(uintptr_t)i_00_ff_w); } else if (!tc_reverse_blend_1) { @@ -1404,7 +1403,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x0f); addbyte(0xef); addbyte(0xa3); - addlong((uint32_t)&xmm_00_ff_w[0]); + addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]); } else if (!tc_reverse_blend) { @@ -1413,14 +1412,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xef); addbyte(0x24); addbyte(0x25); - addlong(&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); } addbyte(0x66); /*PADDW XMM4, 1*/ addbyte(0x0f); addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong(&xmm_01_w); + addlong((uint32_t)(uintptr_t)&xmm_01_w); addbyte(0xf3); /*MOVQ XMM5, XMM1*/ addbyte(0x0f); addbyte(0x7e); @@ -1488,7 +1487,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x0f); addbyte(0xef); addbyte(0x0d); - addlong(&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); } addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ @@ -1585,7 +1584,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/ addbyte(0x1c); addbyte(0x8d); - addlong((uint32_t)i_00_ff_w); + addlong((uint32_t)(uintptr_t)i_00_ff_w); } else if (!tca_reverse_blend) { @@ -2143,14 +2142,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xef); addbyte(0x1c); addbyte(0x25); - addlong(&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); } addbyte(0x66); /*PADDW XMM3, 1*/ addbyte(0x0f); addbyte(0xfd); addbyte(0x1c); addbyte(0x25); - addlong(&xmm_01_w); + addlong((uint32_t)(uintptr_t)&xmm_01_w); addbyte(0x66); /*PMULLW XMM0, XMM3*/ addbyte(0x0f); addbyte(0xd5); @@ -2194,7 +2193,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xef); addbyte(0x04); addbyte(0x25); - addlong(&xmm_ff_b); + addlong((uint32_t)(uintptr_t)&xmm_ff_b); } if (params->fogMode & FOG_ENABLE) @@ -2441,7 +2440,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo { addbyte(0x49); /*MOV R8, rgb565*/ addbyte(0xb8); - addquad(rgb565); + addquad((uintptr_t)rgb565); addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, x)); @@ -2489,7 +2488,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd5); addbyte(0x24); addbyte(0xd5); - addlong(alookup); + addlong((uint32_t)(uintptr_t)alookup); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2499,7 +2498,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2529,7 +2528,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2555,7 +2554,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd5); addbyte(0x24); addbyte(0xd5); - addlong(aminuslookup); + addlong((uint32_t)(uintptr_t)aminuslookup); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2565,7 +2564,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2587,7 +2586,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x7e); addbyte(0x2c); addbyte(0x25); - addlong(&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); addbyte(0x66); /*PSUBW XMM5, XMM0*/ addbyte(0x0f); addbyte(0xf9); @@ -2605,7 +2604,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2633,7 +2632,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd5); addbyte(0x24); addbyte(0xd5); - addlong(&minus_254); + addlong((uint32_t)(uintptr_t)&minus_254); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2643,7 +2642,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x24); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2674,7 +2673,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd5); addbyte(0x04); addbyte(0xd5); - addlong(alookup); + addlong((uint32_t)(uintptr_t)alookup); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2684,7 +2683,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2714,7 +2713,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2740,7 +2739,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd5); addbyte(0x04); addbyte(0xd5); - addlong(aminuslookup); + addlong((uint32_t)(uintptr_t)aminuslookup); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2750,7 +2749,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2772,7 +2771,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x7e); addbyte(0x2c); addbyte(0x25); - addlong(&xmm_ff_w); + addlong((uint32_t)(uintptr_t)&xmm_ff_w); addbyte(0x66); /*PSUBW XMM5, XMM6*/ addbyte(0x0f); addbyte(0xf9); @@ -2790,7 +2789,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xfd); addbyte(0x04); addbyte(0x25); - addlong((uint32_t)alookup + 16); + addlong((uint32_t)(uintptr_t)alookup + 16); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2842,7 +2841,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo { addbyte(0x49); /*MOV R8, dither_rb*/ addbyte(0xb8); - addquad(dither2x2 ? dither_rb2x2 : dither_rb); + addquad(dither2x2 ? (uintptr_t)dither_rb2x2 : (uintptr_t)dither_rb); addbyte(0x4c); /*MOV ESI, real_y (R14)*/ addbyte(0x89); addbyte(0xf6); diff --git a/src/VIDEO/vid_voodoo_codegen_x86.h b/src/VIDEO/vid_voodoo_codegen_x86.h index 54c09a1bc..5a9f65e46 100644 --- a/src/VIDEO/vid_voodoo_codegen_x86.h +++ b/src/VIDEO/vid_voodoo_codegen_x86.h @@ -40,43 +40,43 @@ static int last_block[2] = {0, 0}; static int next_block_to_write[2] = {0, 0}; #define addbyte(val) \ - code_block[block_pos++] = (uint8_t)val; \ + code_block[block_pos++] = val; \ if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addword(val) \ - *(uint16_t *)&code_block[block_pos] = (uint16_t)val; \ - block_pos += 2; \ - if (block_pos >= BLOCK_SIZE) \ +#define addword(val) \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addlong(val) \ - *(uint32_t *)&code_block[block_pos] = (uint32_t)val; \ - block_pos += 4; \ - if (block_pos >= BLOCK_SIZE) \ +#define addlong(val) \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -#define addquad(val) \ - *(uint64_t *)&code_block[block_pos] = (uint64_t)val; \ - block_pos += 8; \ - if (block_pos >= BLOCK_SIZE) \ +#define addquad(val) \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ fatal("Over!\n") -static __m128i xmm_01_w; -static __m128i xmm_ff_w; -static __m128i xmm_ff_b; +static __m128i xmm_01_w;// = 0x0001000100010001ull; +static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull; +static __m128i xmm_ff_b;// = 0x00000000ffffffffull; static uint32_t zero = 0; static double const_1_48 = (double)(1ull << 4); static __m128i alookup[257], aminuslookup[256]; -static __m128i minus_254; +static __m128i minus_254;// = 0xff02ff02ff02ff02ull; static __m128i bilinear_lookup[256*2]; static __m128i xmm_00_ff_w[2]; static uint32_t i_00_ff_w[2] = {0, 0xff}; -static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) +static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) { if (params->textureMode[tmu] & 1) { @@ -85,7 +85,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); addbyte(0xdd); /*FLDq const_1_48*/ addbyte(0x05); - addlong(&const_1_48); + addlong((uint32_t)&const_1_48); addbyte(0xde); /*FDIV ST(1)*/ addbyte(0xf1); addbyte(0xdf); /*FILDq state->tmu0_s*/ @@ -129,7 +129,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/ addbyte(0xb6); addbyte(0x9b); - addlong(logtable); + addlong((uint32_t)logtable); addbyte(0x09); /*OR EAX, EBX*/ addbyte(0xd8); addbyte(0x03); /*ADD EAX, state->lod*/ @@ -322,7 +322,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*CMOVS EDX, zero*/ addbyte(0x48); addbyte(0x15); - addlong(&zero); + addlong((uint32_t)&zero); addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ addbyte(0x96); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -335,7 +335,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*CMOVS EBX, zero*/ addbyte(0x48); addbyte(0x1d); - addlong(&zero); + addlong((uint32_t)&zero); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ addbyte(0x9e); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -379,7 +379,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*CMOVS EAX, zero*/ addbyte(0x48); addbyte(0x05); - addlong(&zero); + addlong((uint32_t)&zero); addbyte(0x78); /*JS + - clamp on 0*/ addbyte(2+3+2+ 5+5+2); addbyte(0x3b); /*CMP EAX, EBP*/ @@ -489,7 +489,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x81); /*ADD ESI, bilinear_lookup*/ addbyte(0xc6); - addlong(bilinear_lookup); + addlong((uint32_t)bilinear_lookup); addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ addbyte(0x0f); @@ -592,7 +592,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*CMOVS EAX, zero*/ addbyte(0x48); addbyte(0x05); - addlong(&zero); + addlong((uint32_t)&zero); addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ addbyte(0x84); addbyte(0x8e); @@ -618,7 +618,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, addbyte(0x0f); /*CMOVS EBX, zero*/ addbyte(0x48); addbyte(0x1d); - addlong(&zero); + addlong((uint32_t)&zero); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ addbyte(0x9c); addbyte(0x8e); @@ -653,7 +653,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, return block_pos; } -static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) +static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) { int block_pos = 0; int z_skip_pos = 0; @@ -662,10 +662,24 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood int depth_jump_pos = 0; int depth_jump_pos2 = 0; int loop_jump_pos = 0; +// xmm_01_w = (__m128i)0x0001000100010001ull; +// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull; +// xmm_ff_b = (__m128i)0x00000000ffffffffull; xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001); xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff); xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff); minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02); +// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull; +// block_pos = 0; +// voodoo_get_depth = &code_block[block_pos]; + /*W at (%esp+4) + Z at (%esp+12) + new_depth at (%esp+16)*/ +// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER)) +// { +// addbyte(0xC3); /*RET*/ +// return; +// } addbyte(0x55); /*PUSH EBP*/ addbyte(0x57); /*PUSH EDI*/ addbyte(0x56); /*PUSH ESI*/ @@ -697,6 +711,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x75); /*JNZ got_depth*/ depth_jump_pos = block_pos; addbyte(0); +// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); addbyte(0x8b); /*MOV EDX, w*/ addbyte(0x97); addlong(offsetof(voodoo_state_t, w)); @@ -710,6 +725,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x74); /*JZ got_depth*/ depth_jump_pos2 = block_pos; addbyte(0); +// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); addbyte(0xb9); /*MOV ECX, 19*/ addlong(19); addbyte(0x0f); /*BSR EAX, EDX*/ @@ -875,8 +891,17 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER)) { addbyte(0xC3); /*RET*/ +// addbyte(0x30); /*XOR EAX, EAX*/ +// addbyte(0xc0); } +// else +// { +// addbyte(0xb0); /*MOV AL, 1*/ +// addbyte(1); +// } + +// voodoo_combine = &code_block[block_pos]; /*XMM0 = colour*/ /*XMM2 = 0 (for unpacking*/ @@ -1367,13 +1392,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0xef); addbyte(0x25); - addlong(&xmm_ff_w); + addlong((uint32_t)&xmm_ff_w); } addbyte(0x66); /*PADDW XMM4, 1*/ addbyte(0x0f); addbyte(0xfd); addbyte(0x25); - addlong(&xmm_01_w); + addlong((uint32_t)&xmm_01_w); addbyte(0xf3); /*MOVQ XMM5, XMM1*/ addbyte(0x0f); addbyte(0x7e); @@ -1441,7 +1466,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0xef); addbyte(0x0d); - addlong(&xmm_ff_w); + addlong((uint32_t)&xmm_ff_w); } addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ @@ -2096,13 +2121,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0xef); addbyte(0x1d); - addlong(&xmm_ff_w); + addlong((uint32_t)&xmm_ff_w); } addbyte(0x66); /*PADDW XMM3, 1*/ addbyte(0x0f); addbyte(0xfd); addbyte(0x1d); - addlong(&xmm_01_w); + addlong((uint32_t)&xmm_01_w); addbyte(0x66); /*PMULLW XMM0, XMM3*/ addbyte(0x0f); addbyte(0xd5); @@ -2145,8 +2170,14 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0xef); addbyte(0x05); - addlong(&xmm_ff_b); + addlong((uint32_t)&xmm_ff_b); } +//#if 0 +// addbyte(0x66); /*MOVD state->out[EDI], XMM0*/ +// addbyte(0x0f); +// addbyte(0x7e); +// addbyte(0x87); +// addlong(offsetof(voodoo_state_t, out)); if (params->fogMode & FOG_ENABLE) { if (params->fogMode & FOG_CONSTANT) @@ -2241,6 +2272,11 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(10); addbyte(0x01); /*ADD EAX, EBX*/ addbyte(0xd8); + +/* int fog_idx = (w_depth >> 10) & 0x3f; + + fog_a = params->fogTable[fog_idx].fog; + fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/ break; case FOG_Z: @@ -2252,6 +2288,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(12); addbyte(0x25); /*AND EAX, 0xff*/ addlong(0xff); +// fog_a = (z >> 20) & 0xff; break; case FOG_ALPHA: @@ -2273,6 +2310,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); /*CMOVAE EAX, EBX*/ addbyte(0x43); addbyte(0xc3); +// fog_a = CLAMP(ia >> 12); break; case FOG_W: @@ -2293,10 +2331,12 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); /*CMOVAE EAX, EBX*/ addbyte(0x43); addbyte(0xc3); +// fog_a = CLAMP(w >> 32); break; } addbyte(0x01); /*ADD EAX, EAX*/ addbyte(0xc0); +// fog_a++; addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/ addbyte(0x0f); @@ -2419,7 +2459,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x6e); addbyte(0x24); addbyte(0x85); - addlong(rgb565); + addlong((uint32_t)rgb565); addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/ addbyte(0x0f); addbyte(0x60); @@ -2443,7 +2483,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0xd5); addbyte(0x24); addbyte(0xd5); - addlong(alookup); + addlong((uint32_t)alookup); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2507,7 +2547,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0xd5); addbyte(0x24); addbyte(0xd5); - addlong(aminuslookup); + addlong((uint32_t)aminuslookup); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2537,7 +2577,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0x7e); addbyte(0x2d); - addlong(&xmm_ff_w); + addlong((uint32_t)&xmm_ff_w); addbyte(0x66); /*PSUBW XMM5, XMM0*/ addbyte(0x0f); addbyte(0xf9); @@ -2581,7 +2621,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0xd5); addbyte(0x25); - addlong(&minus_254); + addlong((uint32_t)&minus_254); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); @@ -2621,7 +2661,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0xd5); addbyte(0x04); addbyte(0xd5); - addlong(alookup); + addlong((uint32_t)alookup); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2685,7 +2725,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0xd5); addbyte(0x04); addbyte(0xd5); - addlong(aminuslookup); + addlong((uint32_t)aminuslookup); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2715,7 +2755,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x0f); addbyte(0x7e); addbyte(0x2d); - addlong(&xmm_ff_w); + addlong((uint32_t)&xmm_ff_w); addbyte(0x66); /*PSUBW XMM5, XMM6*/ addbyte(0x0f); addbyte(0xf9); @@ -2768,6 +2808,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0x67); addbyte(0xc0); } +//#endif + +// addbyte(0x8b); /*MOV EDX, x (ESP+12)*/ +// addbyte(0x54); +// addbyte(0x24); +// addbyte(12); + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ addbyte(0x97); @@ -2780,6 +2827,10 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood if (params->fbzMode & FBZ_RGB_WMASK) { +// addbyte(0x89); /*MOV state->rgb_out[EDI], EAX*/ +// addbyte(0x87); +// addlong(offsetof(voodoo_state_t, rgb_out)); + if (dither) { addbyte(0x8b); /*MOV ESI, real_y (ESP+16)*/ @@ -2857,17 +2908,17 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood addbyte(0xb6); addbyte(0x9c); addbyte(0x33); - addlong(dither2x2 ? dither_g2x2 : dither_g); + addlong(dither2x2 ? (uint32_t)dither_g2x2 : (uint32_t)dither_g); addbyte(0x0f); /*MOVZX ECX, dither_rb[ECX+ESI]*/ addbyte(0xb6); addbyte(0x8c); addbyte(0x31); - addlong(dither2x2 ? dither_rb2x2 : dither_rb); + addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb); addbyte(0x0f); /*MOVZX EAX, dither_rb[EAX+ESI]*/ addbyte(0xb6); addbyte(0x84); addbyte(0x30); - addlong(dither2x2 ? dither_rb2x2 : dither_rb); + addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb); addbyte(0xc1); /*SHL EBX, 5*/ addbyte(0xe3); addbyte(5); @@ -3181,7 +3232,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood } static int voodoo_recomp = 0; -static __inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) +static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) { int c; int b = last_block[odd_even]; @@ -3211,6 +3262,7 @@ static __inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params } voodoo_recomp++; data = &codegen_data[odd_even + next_block_to_write[odd_even]*2]; +// code_block = data->code_block; voodoo_generate(data->code_block, voodoo, params, state, depth_op); diff --git a/src/hdd_esdi.c b/src/hdd_esdi.c index 4772b2c48..4c3cc152d 100644 --- a/src/hdd_esdi.c +++ b/src/hdd_esdi.c @@ -54,7 +54,7 @@ typedef struct esdi_t int data_pos; uint16_t data[256]; - uint16_t sector_buffer[16][256]; + uint16_t sector_buffer[256][256]; int sector_pos; int sector_count; @@ -632,7 +632,7 @@ static void esdi_callback(void *p) case 0: esdi->sector_pos = 0; esdi->sector_count = esdi->cmd_data[1]; - if (esdi->sector_count > 16) + if (esdi->sector_count > 256) fatal("Read sector buffer count %04x\n", esdi->cmd_data[1]); esdi->status = STATUS_IRQ | STATUS_CMD_IN_PROGRESS | STATUS_TRANSFER_REQ; @@ -692,7 +692,7 @@ static void esdi_callback(void *p) case 0: esdi->sector_pos = 0; esdi->sector_count = esdi->cmd_data[1]; - if (esdi->sector_count > 16) + if (esdi->sector_count > 256) fatal("Write sector buffer count %04x\n", esdi->cmd_data[1]); esdi->status = STATUS_IRQ | STATUS_CMD_IN_PROGRESS | STATUS_TRANSFER_REQ; diff --git a/src/mem.c b/src/mem.c index 5fd0cf5a2..fd393e446 100644 --- a/src/mem.c +++ b/src/mem.c @@ -1131,7 +1131,7 @@ void addwritelookup(uint32_t virt, uint32_t phys) writelookup2[writelookup[writelnext]] = -1; } - if (pages[phys >> 12].block || (phys & ~0xfff) == recomp_page) + if (pages[phys >> 12].block[0] || pages[phys >> 12].block[1] || pages[phys >> 12].block[2] || pages[phys >> 12].block[3] || (phys & ~0xfff) == recomp_page) page_lookup[virt >> 12] = &pages[phys >> 12]; else writelookup2[virt>>12] = (uintptr_t)&ram[(uintptr_t)(phys & ~0xFFF) - (uintptr_t)(virt & ~0xfff)]; @@ -1713,7 +1713,7 @@ void mem_write_ramb_page(uint32_t addr, uint8_t val, page_t *p) if (val != p->mem[addr & 0xfff] || codegen_in_recompile) { uint64_t mask = (uint64_t)1 << ((addr >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK); - p->dirty_mask |= mask; + p->dirty_mask[(addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK] |= mask; p->mem[addr & 0xfff] = val; } } @@ -1722,9 +1722,9 @@ void mem_write_ramw_page(uint32_t addr, uint16_t val, page_t *p) if (val != *(uint16_t *)&p->mem[addr & 0xfff] || codegen_in_recompile) { uint64_t mask = (uint64_t)1 << ((addr >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK); - if ((addr & 0x3f) == 0x3f) + if ((addr & 0xf) == 0xf) mask |= (mask << 1); - p->dirty_mask |= mask; + p->dirty_mask[(addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK] |= mask; *(uint16_t *)&p->mem[addr & 0xfff] = val; } } @@ -1733,9 +1733,9 @@ void mem_write_raml_page(uint32_t addr, uint32_t val, page_t *p) if (val != *(uint32_t *)&p->mem[addr & 0xfff] || codegen_in_recompile) { uint64_t mask = (uint64_t)1 << ((addr >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK); - if ((addr & 0x3f) >= 0x3d) + if ((addr & 0xf) >= 0xd) mask |= (mask << 1); - p->dirty_mask |= mask; + p->dirty_mask[(addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK] |= mask; *(uint32_t *)&p->mem[addr & 0xfff] = val; } } @@ -1758,10 +1758,10 @@ void mem_write_raml(uint32_t addr, uint32_t val, void *priv) uint8_t mem_read_bios(uint32_t addr, void *priv) { - if (AMIBIOS && (addr&0xFFFFF)==0xF8281) /*This is read constantly during AMIBIOS POST, but is never written to. It's clearly a status register of some kind, but for what?*/ - { - return 0x40; - } + if (AMIBIOS && (addr&0xFFFFF)==0xF8281) /*This is read constantly during AMIBIOS POST, but is never written to. It's clearly a status register of some kind, but for what?*/ + { + return 0x40; + } return rom[addr & biosmask]; } uint16_t mem_read_biosw(uint32_t addr, void *priv) @@ -1804,8 +1804,8 @@ void mem_invalidate_range(uint32_t start_addr, uint32_t end_addr) for (; start_addr <= end_addr; start_addr += (1 << PAGE_MASK_SHIFT)) { uint64_t mask = (uint64_t)1 << ((start_addr >> PAGE_MASK_SHIFT) & PAGE_MASK_MASK); - - pages[start_addr >> 12].dirty_mask |= mask; + + pages[start_addr >> 12].dirty_mask[(start_addr >> PAGE_MASK_INDEX_SHIFT) & PAGE_MASK_INDEX_MASK] |= mask; } } @@ -2220,8 +2220,8 @@ void mem_reset_page_blocks() pages[c].write_b = mem_write_ramb_page; pages[c].write_w = mem_write_ramw_page; pages[c].write_l = mem_write_raml_page; - pages[c].block = NULL; - pages[c].block_2 = NULL; + pages[c].block[0] = pages[c].block[1] = pages[c].block[2] = pages[c].block[3] = NULL; + pages[c].block_2[0] = pages[c].block_2[1] = pages[c].block_2[2] = pages[c].block_2[3] = NULL; } } diff --git a/src/mem.h b/src/mem.h index 6bf127ecf..916e89995 100644 --- a/src/mem.h +++ b/src/mem.h @@ -122,12 +122,12 @@ typedef struct page_t uint8_t *mem; - struct codeblock_t *block, *block_2; + struct codeblock_t *block[4], *block_2[4]; /*Head of codeblock tree associated with this page*/ struct codeblock_t *head; - uint64_t code_present_mask, dirty_mask; + uint64_t code_present_mask[4], dirty_mask[4]; } page_t; extern page_t *pages;