diff --git a/src/acpi.c b/src/acpi.c index 467fad473..cbf7345c9 100644 --- a/src/acpi.c +++ b/src/acpi.c @@ -81,35 +81,24 @@ acpi_update_irq(void *priv) } -static void -acpi_raise_smi_common(void *priv) -{ - acpi_t *dev = (acpi_t *) priv; - - if ((dev->vendor == VEN_VIA) || (dev->vendor == VEN_VIA_596B)) { - if ((!dev->regs.smi_lock || !dev->regs.smi_active)) { - smi_line = 1; - dev->regs.smi_active = 1; - } - } else if (dev->vendor == VEN_INTEL) { - smi_line = 1; - /* Clear bit 16 of GLBCTL. */ - dev->regs.glbctl &= ~0x00010000; - } else if (dev->vendor == VEN_SMC) - smi_line = 1; -} - - static void acpi_raise_smi(void *priv) { acpi_t *dev = (acpi_t *) priv; - if ((dev->vendor == VEN_INTEL) && !(dev->regs.glbctl & 0x00010000)) - return; - - if (dev->regs.glbctl & 0x01) - acpi_raise_smi_common(dev); + if (dev->regs.glbctl & 0x01) { + if ((dev->vendor == VEN_VIA) || (dev->vendor == VEN_VIA_596B)) { + if ((!dev->regs.smi_lock || !dev->regs.smi_active)) { + smi_line = 1; + dev->regs.smi_active = 1; + } + } else if (dev->vendor == VEN_INTEL) { + smi_line = 1; + /* Clear bit 16 of GLBCTL. */ + dev->regs.glbctl &= ~0x00010000; + } else if (dev->vendor == VEN_SMC) + smi_line = 1; + } } @@ -1119,7 +1108,7 @@ acpi_apm_out(uint16_t port, uint8_t val, void *p) if (dev->apm->do_smi) { if (dev->vendor == VEN_INTEL) dev->regs.glbsts |= 0x20; - acpi_raise_smi_common(dev); + acpi_raise_smi(dev); } } else dev->apm->stat = val; diff --git a/src/chipset/intel_4x0.c b/src/chipset/intel_4x0.c index 7373e48e9..981464ba7 100644 --- a/src/chipset/intel_4x0.c +++ b/src/chipset/intel_4x0.c @@ -143,7 +143,7 @@ i4x0_smram_handler_phase1(i4x0_t *dev) size[0] = 0x00020000; } - if (((regs[0x72] & 0x78) == 0x48) || ((regs[0x72] & 0x28) == 0x08)) + if (regs[0x72] & 0x08) smram_enable(dev->smram_low, base[0], base[0] & 0x000f0000, size[0], ((regs[0x72] & 0x78) == 0x48), (regs[0x72] & 0x08)); diff --git a/src/chipset/via_apollo.c b/src/chipset/via_apollo.c index c8055fbb4..f1c468460 100644 --- a/src/chipset/via_apollo.c +++ b/src/chipset/via_apollo.c @@ -363,20 +363,16 @@ via_apollo_host_bridge_write(int func, int addr, uint8_t val, void *priv) apollo_smram_map(dev, 1, 0x000a0000, 0x00020000, 1); /* SMM: Code DRAM, Data DRAM */ apollo_smram_map(dev, 0, 0x000a0000, 0x00020000, 3); /* Non-SMM: Code Invalid, Data Invalid */ break; - } else if(dev->id >= VIA_597) switch (val & 0x03) { + } else if (dev->id >= VIA_597) switch (val & 0x03) { case 0x00: default: /* Disable SMI Address Redirection (default) */ apollo_smram_map(dev, 1, 0x000a0000, 0x00020000, 0); - if (dev->id == VIA_597) - apollo_smram_map(dev, 1, 0x00030000, 0x00020000, 1); apollo_smram_map(dev, 0, 0x000a0000, 0x00020000, 0); break; case 0x01: /* Allow access to DRAM Axxxx-Bxxxx for both normal and SMI cycles */ apollo_smram_map(dev, 1, 0x000a0000, 0x00020000, 1); - if (dev->id == VIA_597) - apollo_smram_map(dev, 1, 0x00030000, 0x00020000, 1); apollo_smram_map(dev, 0, 0x000a0000, 0x00020000, 1); break; case 0x02: @@ -391,8 +387,6 @@ via_apollo_host_bridge_write(int func, int addr, uint8_t val, void *priv) case 0x03: /* Allow SMI Axxxx-Bxxxx DRAM access */ apollo_smram_map(dev, 1, 0x000a0000, 0x00020000, 1); - if (dev->id == VIA_597) - apollo_smram_map(dev, 1, 0x00030000, 0x00020000, 1); apollo_smram_map(dev, 0, 0x000a0000, 0x00020000, 0); break; } else switch(val & 0x03) { @@ -711,7 +705,7 @@ const device_t via_vpx_device = { "VIA Apollo VPX", DEVICE_PCI, - VIA_597, /*VT82C585*/ + VIA_585, /*VT82C585*/ via_apollo_init, via_apollo_close, via_apollo_reset, @@ -725,7 +719,7 @@ const device_t amd640_device = { "AMD 640 System Controller", DEVICE_PCI, - VIA_597, /*VT82C595*/ + VIA_595, /*VT82C595*/ via_apollo_init, via_apollo_close, via_apollo_reset, diff --git a/src/cpu/386.c b/src/cpu/386.c index ade157a32..fe05bb02e 100644 --- a/src/cpu/386.c +++ b/src/cpu/386.c @@ -257,6 +257,30 @@ exec386(int cycs) if (!use32) cpu_state.pc &= 0xffff; #endif + if (cpu_state.abrt) { + flags_rebuild(); + tempi = cpu_state.abrt & ABRT_MASK; + cpu_state.abrt = 0; + x86_doabrt(tempi); + if (cpu_state.abrt) { + cpu_state.abrt = 0; +#ifndef USE_NEW_DYNAREC + CS = oldcs; +#endif + cpu_state.pc = cpu_state.oldpc; + x386_log("Double fault %i\n", ins); + pmodeint(8, 0); + if (cpu_state.abrt) { + cpu_state.abrt = 0; + softresetx86(); + cpu_set_edx(); +#ifdef ENABLE_386_LOG + x386_log("Triple fault - reset\n"); +#endif + } + } + } + ins_cycles -= cycles; tsc += ins_cycles; @@ -308,28 +332,6 @@ exec386(int cycs) loadcs(readmemw(0, addr + 2)); } } - } else if (cpu_state.abrt) { - flags_rebuild(); - tempi = cpu_state.abrt & ABRT_MASK; - cpu_state.abrt = 0; - x86_doabrt(tempi); - if (cpu_state.abrt) { - cpu_state.abrt = 0; -#ifndef USE_NEW_DYNAREC - CS = oldcs; -#endif - cpu_state.pc = cpu_state.oldpc; - x386_log("Double fault %i\n", ins); - pmodeint(8, 0); - if (cpu_state.abrt) { - cpu_state.abrt = 0; - softresetx86(); - cpu_set_edx(); -#ifdef ENABLE_386_LOG - x386_log("Triple fault - reset\n"); -#endif - } - } } cpu_end_block_after_ins = 0; diff --git a/src/cpu/808x.c b/src/cpu/808x.c index 7b961961b..7e55a1b3a 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -98,8 +98,7 @@ static uint32_t *ovr_seg = NULL; static int prefetching = 1, completed = 1; static int in_rep = 0, repeating = 0; static int oldc, clear_lock = 0; -static int refresh = 0, takeint = 0; -static int cycdiff; +static int refresh = 0, cycdiff; /* Various things needed for 8087. */ @@ -254,7 +253,6 @@ static void wait(int c, int bus) { cycles -= c; - fetch_and_bus(c, bus); } @@ -279,17 +277,37 @@ sub_cycles(int c) #undef readmemq -/* Common read function. */ -static uint8_t -readmemb_common(uint32_t a) +static void +cpu_io(int bits, int out, uint16_t port) { - uint8_t ret; - - ret = read_mem_b(a); - - return ret; + if (out) { + wait(4, 1); + if (bits == 16) { + if (is8086 && !(port & 1)) + outw(port, AX); + else { + wait(4, 1); + outb(port++, AL); + outb(port, AH); + } + } else + outb(port, AL); + } else { + wait(4, 1); + if (bits == 16) { + if (is8086 && !(port & 1)) + AX = inw(port); + else { + wait(4, 1); + AL = inb(port++); + AH = inb(port); + } + } else + AL = inb(port); + } } + /* Reads a byte from the memory and advances the BIU. */ static uint8_t readmemb(uint32_t a) @@ -297,7 +315,7 @@ readmemb(uint32_t a) uint8_t ret; wait(4, 1); - ret = readmemb_common(a); + ret = read_mem_b(a); return ret; } @@ -310,35 +328,26 @@ readmembf(uint32_t a) uint8_t ret; a = cs + (a & 0xffff); - ret = readmemb_common(a); + ret = read_mem_b(a); return ret; } /* Reads a word from the memory and advances the BIU. */ -static uint16_t -readmemw_common(uint32_t s, uint16_t a) -{ - uint16_t ret; - - ret = readmemb_common(s + a); - ret |= readmemb_common(s + ((a + 1) & 0xffff)) << 8; - - return ret; -} - - static uint16_t readmemw(uint32_t s, uint16_t a) { uint16_t ret; + wait(4, 1); if (is8086 && !(a & 1)) + ret = read_mem_w(s + a); + else { wait(4, 1); - else - wait(8, 1); - ret = readmemw_common(s, a); + ret = read_mem_b(s + a); + ret |= read_mem_b(s + ((a + 1) & 0xffff)) << 8; + } return ret; } @@ -349,7 +358,7 @@ readmemwf(uint16_t a) { uint16_t ret; - ret = readmemw_common(cs, a & 0xffff); + ret = read_mem_w(cs + (a & 0xffff)); return ret; } @@ -389,22 +398,17 @@ readmemq(uint32_t s, uint16_t a) } -static void -writememb_common(uint32_t a, uint8_t v) -{ - write_mem_b(a, v); - - if ((a >= 0xf0000) && (a <= 0xfffff)) - last_addr = a & 0xffff; -} - - /* Writes a byte to the memory and advances the BIU. */ static void writememb(uint32_t s, uint32_t a, uint8_t v) { + uint32_t addr = s + a; + wait(4, 1); - writememb_common(s + a, v); + write_mem_b(addr, v); + + if ((addr >= 0xf0000) && (addr <= 0xfffff)) + last_addr = addr & 0xffff; } @@ -412,12 +416,20 @@ writememb(uint32_t s, uint32_t a, uint8_t v) static void writememw(uint32_t s, uint32_t a, uint16_t v) { + uint32_t addr = s + a; + + wait(4, 1); if (is8086 && !(a & 1)) + write_mem_w(addr, v); + else { + write_mem_b(addr, v & 0xff); wait(4, 1); - else - wait(8, 1); - writememb_common(s + a, v & 0xff); - writememb_common(s + ((a + 1) & 0xffff), v >> 8); + addr = s + ((a + 1) & 0xffff); + write_mem_b(addr, v >> 8); + } + + if ((addr >= 0xf0000) && (addr <= 0xfffff)) + last_addr = addr & 0xffff; } @@ -619,16 +631,26 @@ makemod1table(void) static uint16_t -sign_extend(uint8_t data) +get_accum(int bits) { - return data + (data < 0x80 ? 0 : 0xff00); + return (bits == 16) ? AX : AL; } -static uint32_t -sign_extend32(uint16_t data) +static void +set_accum(int bits, uint16_t val) { - return data + (data < 0x8000 ? 0 : 0xffff0000); + if (bits == 16) + AX = val; + else + AL = val; +} + + +static uint16_t +sign_extend(uint8_t data) +{ + return data + (data < 0x80 ? 0 : 0xff00); } @@ -947,7 +969,6 @@ reset_common(int hard) cpu_alt_reset = 0; prefetching = 1; - takeint = 0; cpu_ven_reset(); @@ -1132,12 +1153,8 @@ irq_pending(void) { uint8_t temp; - if (takeint && !noint) - temp = 1; - else - temp = (nmi && nmi_enable && nmi_mask) || ((cpu_state.flags & T_FLAG) && !noint); - - takeint = (cpu_state.flags & I_FLAG) && pic.int_pending; + temp = (nmi && nmi_enable && nmi_mask) || ((cpu_state.flags & T_FLAG) && !noint) || + ((cpu_state.flags & I_FLAG) && pic.int_pending && !noint); return temp; } @@ -1301,10 +1318,7 @@ set_of(int of) static int top_bit(uint16_t w, int bits) { - if (bits == 16) - return ((w & 0x8000) != 0); - else - return ((w & 0x80) != 0); + return (w & (1 << (bits - 1))); } @@ -1533,7 +1547,7 @@ set_of_rotate(int bits) static void -set_zf_ex(int bits, int zf) +set_zf_ex(int zf) { cpu_state.flags = (cpu_state.flags & ~0x40) | (zf ? 0x40 : 0); } @@ -1544,7 +1558,7 @@ set_zf(int bits) { int size_mask = (1 << bits) - 1; - set_zf_ex(bits, (cpu_data & size_mask) == 0); + set_zf_ex((cpu_data & size_mask) == 0); } @@ -1558,13 +1572,13 @@ set_pzs(int bits) static void -set_co_mul(int carry) +set_co_mul(int bits, int carry) { set_cf(carry); set_of(carry); /* NOTE: When implementing the V20, care should be taken to not change the zero flag. */ - set_zf(!carry); + set_zf_ex(!carry); if (!carry) wait(1, 0); } @@ -1795,7 +1809,6 @@ execx86(int cycs) uint8_t old_af; uint16_t addr, tempw; uint16_t new_cs, new_ip; - uint32_t result; int bits; cycles += cycs; @@ -1889,12 +1902,12 @@ execx86(int cycs) bits = 8 << (opcode & 1); wait(1, 0); cpu_data = pfq_fetch(); - cpu_dest = get_reg(0); /* AX/AL */ + cpu_dest = get_accum(bits); /* AX/AL */ cpu_src = cpu_data; cpu_alu_op = (opcode >> 3) & 7; alu_op(bits); if (cpu_alu_op != 7) - set_reg(0, cpu_data); + set_accum(bits, cpu_data); wait(1, 0); break; @@ -1924,7 +1937,7 @@ execx86(int cycs) cpu_dest = AL; set_of(0); old_af = !!(cpu_state.flags & A_FLAG); - if ((cpu_state.flags & A_FLAG) || (AL & 0xf) > 9) { + if ((cpu_state.flags & A_FLAG) || ((AL & 0xf) > 9)) { cpu_src = 6; cpu_data = cpu_dest - cpu_src; set_of_sub(8); @@ -1944,7 +1957,7 @@ execx86(int cycs) break; case 0x37: /*AAA*/ wait(1, 0); - if ((cpu_state.flags & A_FLAG) || (AL & 0xf) > 9) { + if ((cpu_state.flags & A_FLAG) || ((AL & 0xf) > 9)) { cpu_src = 6; ++AH; set_ca(); @@ -1960,7 +1973,7 @@ execx86(int cycs) break; case 0x3F: /*AAS*/ wait(1, 0); - if ((cpu_state.flags & A_FLAG) || (AL & 0xf) > 9) { + if ((cpu_state.flags & A_FLAG) || ((AL & 0xf) > 9)) { cpu_src = 6; --AH; set_ca(); @@ -2265,7 +2278,7 @@ execx86(int cycs) wait(1, 0); cpu_state.eaaddr = pfq_fetchw(); access(1, bits); - set_reg(0, readmem((ovr_seg ? *ovr_seg : ds))); + set_accum(bits, readmem((ovr_seg ? *ovr_seg : ds))); wait(1, 0); break; case 0xA2: case 0xA3: @@ -2274,7 +2287,7 @@ execx86(int cycs) wait(1, 0); cpu_state.eaaddr = pfq_fetchw(); access(7, bits); - writemem((ovr_seg ? *ovr_seg : ds), get_reg(0)); + writemem((ovr_seg ? *ovr_seg : ds), get_accum(bits)); break; case 0xA4: case 0xA5: /* MOVS */ @@ -2299,7 +2312,7 @@ execx86(int cycs) access(27, bits); stos(bits); } else { - set_reg(0, cpu_data); + set_accum(bits, cpu_data); if (in_rep != 0) wait(2, 0); } @@ -2325,7 +2338,7 @@ execx86(int cycs) if (in_rep != 0) wait(1, 0); wait(1, 0); - cpu_dest = get_reg(0); + cpu_dest = get_accum(bits); if ((opcode & 8) == 0) { access(21, bits); lods(bits); @@ -2357,7 +2370,7 @@ execx86(int cycs) bits = 8 << (opcode & 1); wait(1, 0); cpu_data = pfq_fetch(); - test(bits, get_reg(0), cpu_data); + test(bits, get_accum(bits), cpu_data); wait(1, 0); break; @@ -2686,30 +2699,20 @@ execx86(int cycs) cpu_state.eaaddr = cpu_data; if ((opcode & 2) == 0) { access(3, bits); - if ((opcode & 1) && is8086 && !(cpu_data & 1)) { - AX = inw(cpu_data); - wait(4, 1); /* I/O access and wait state. */ - } else { - AL = inb(cpu_data); - if (opcode & 1) - AH = inb(cpu_data + 1); - wait(bits >> 1, 1); /* I/O access. */ - } + if (opcode & 1) + cpu_io(16, 0, cpu_data); + else + cpu_io(8, 0, cpu_data); wait(1, 0); } else { if ((opcode & 8) == 0) access(8, bits); else access(9, bits); - if ((opcode & 1) && is8086 && !(cpu_data & 1)) { - outw(cpu_data, AX); - wait(4, 1); - } else { - outb(cpu_data, AL); - if (opcode & 1) - outb(cpu_data + 1, AH); - wait(bits >> 1, 1); /* I/O access. */ - } + if (opcode & 1) + cpu_io(16, 1, cpu_data); + else + cpu_io(8, 1, cpu_data); } break; @@ -2806,27 +2809,20 @@ execx86(int cycs) case 0x20: /* MUL */ case 0x28: /* IMUL */ wait(1, 0); + mul(get_accum(bits), cpu_data); if (opcode & 1) { - result = cpu_data; - mul(AX, cpu_data); AX = cpu_data; DX = cpu_dest; - result = ((uint32_t) DX << 16) | AX; - if ((rmdat & 0x38) == 0x20) - set_co_mul(DX != 0x0000); - else - set_co_mul(result != sign_extend32(AX)); + set_co_mul(bits, DX != ((AX & 0x8000) == 0 || (rmdat & 0x38) == 0x20 ? 0 : 0xffff)); cpu_data = DX; } else { - mul(AL, cpu_data); AL = (uint8_t) cpu_data; AH = (uint8_t) cpu_dest; - if ((rmdat & 0x38) == 0x20) - set_co_mul(AH != 0x00); - else - set_co_mul(AX != sign_extend(AL)); + set_co_mul(bits, AH != ((AL & 0x80) == 0 || (rmdat & 0x38) == 0x20 ? 0 : 0xff)); cpu_data = AH; } + /* NOTE: When implementing the V20, care should be taken to not change + the zero flag. */ set_sf(bits); set_pf(); if (cpu_mod != 3) diff --git a/src/device/keyboard_at.c b/src/device/keyboard_at.c index 5f158b608..cd521da8b 100644 --- a/src/device/keyboard_at.c +++ b/src/device/keyboard_at.c @@ -734,7 +734,7 @@ static void kbd_poll(void *priv) { atkbd_t *dev = (atkbd_t *)priv; - uint16_t irq_table[4] = { 0x0000, 0x0002, 0x1000, 0xffff }; + uint16_t irq_table[4] = { 0x0002, 0x0002, 0x1000, 0xffff }; int i, channel; uint16_t val, irq; diff --git a/src/include/86box/mem.h b/src/include/86box/mem.h index 9ece6b698..a5bae6f89 100644 --- a/src/include/86box/mem.h +++ b/src/include/86box/mem.h @@ -232,7 +232,9 @@ extern int mem_a20_state, extern uint8_t read_mem_b(uint32_t addr); +extern uint16_t read_mem_w(uint32_t addr); extern void write_mem_b(uint32_t addr, uint8_t val); +extern void write_mem_w(uint32_t addr, uint16_t val); #ifndef USE_NEW_DYNAREC #define readmemb(a) ((readlookup2[(a)>>12]==-1)?readmembl(a):*(uint8_t *)(readlookup2[(a) >> 12] + (a))) diff --git a/src/include/86box/sio.h b/src/include/86box/sio.h index f913d9835..df4d84a73 100644 --- a/src/include/86box/sio.h +++ b/src/include/86box/sio.h @@ -25,6 +25,7 @@ extern const device_t fdc37c663_device; extern const device_t fdc37c665_device; extern const device_t fdc37c666_device; extern const device_t fdc37c669_device; +extern const device_t fdc37c669_370_device; extern const device_t fdc37c931apm_device; extern const device_t fdc37c931apm_compaq_device; extern const device_t fdc37c932fr_device; diff --git a/src/include/86box/vid_ddc.h b/src/include/86box/vid_ddc.h new file mode 100644 index 000000000..7b8f8bd29 --- /dev/null +++ b/src/include/86box/vid_ddc.h @@ -0,0 +1,4 @@ +void ddc_init(void); +void ddc_i2c_change(int new_clock, int new_data); +int ddc_read_clock(void); +int ddc_read_data(void); diff --git a/src/include/86box/vid_svga.h b/src/include/86box/vid_svga.h index 7bc35bcdc..9e4277789 100644 --- a/src/include/86box/vid_svga.h +++ b/src/include/86box/vid_svga.h @@ -57,7 +57,7 @@ typedef struct svga_t firstline, lastline, firstline_draw, lastline_draw, displine, fullchange, x_add, y_add, pan, vram_display_mask, vidclock, - hwcursor_on, dac_hwcursor_on, overlay_on; + hwcursor_on, dac_hwcursor_on, overlay_on, set_override; /*The three variables below allow us to implement memory maps like that seen on a 1MB Trio64 : 0MB-1MB - VRAM @@ -77,7 +77,7 @@ typedef struct svga_t extra_banks[2], banked_mask, ca, overscan_color, - *map8, pallook[256]; + *map8, pallook[512]; PALETTE vgapal; diff --git a/src/include/86box/vid_svga_render.h b/src/include/86box/vid_svga_render.h index 6b8b50613..efd23d165 100644 --- a/src/include/86box/vid_svga_render.h +++ b/src/include/86box/vid_svga_render.h @@ -27,6 +27,7 @@ extern int scrollcache; extern uint8_t edatlookup[4][4]; +void svga_render_null(svga_t *svga); void svga_render_blank(svga_t *svga); void svga_render_overscan_left(svga_t *svga); void svga_render_overscan_right(svga_t *svga); diff --git a/src/include/86box/vid_voodoo_banshee.h b/src/include/86box/vid_voodoo_banshee.h new file mode 100644 index 000000000..808c861c8 --- /dev/null +++ b/src/include/86box/vid_voodoo_banshee.h @@ -0,0 +1,18 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Banshee and 3 specific emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ + +void banshee_set_overlay_addr(void *p, uint32_t addr); diff --git a/src/include/86box/vid_voodoo_banshee_blitter.h b/src/include/86box/vid_voodoo_banshee_blitter.h new file mode 100644 index 000000000..3d8c16a1b --- /dev/null +++ b/src/include/86box/vid_voodoo_banshee_blitter.h @@ -0,0 +1,18 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Banshee and 3 specific emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ + +void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val); diff --git a/src/include/86box/vid_voodoo_blitter.h b/src/include/86box/vid_voodoo_blitter.h new file mode 100644 index 000000000..63bd0a99e --- /dev/null +++ b/src/include/86box/vid_voodoo_blitter.h @@ -0,0 +1,20 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ + +void voodoo_v2_blit_start(voodoo_t *voodoo); +void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data); +void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params); diff --git a/src/include/86box/vid_voodoo_codegen_x86-64.h b/src/include/86box/vid_voodoo_codegen_x86-64.h index f684176aa..b5e3ed6d1 100644 --- a/src/include/86box/vid_voodoo_codegen_x86-64.h +++ b/src/include/86box/vid_voodoo_codegen_x86-64.h @@ -5,15 +5,16 @@ fbzColorPath */ -#ifdef __linux__ -# include -# include +#if defined(__linux__) || defined(__APPLE__) +#include +#include #endif #if WIN64 -# include +#define BITMAP windows_BITMAP +#include +#undef BITMAP #endif -#include #include #define BLOCK_NUM 8 @@ -32,45 +33,51 @@ typedef struct voodoo_x86_data_t uint32_t fbzColorPath; uint32_t textureMode[2]; uint32_t tLOD[2]; - uint32_t trexInit1; + uint32_t trexInit1; + int is_tiled; } voodoo_x86_data_t; //static voodoo_x86_data_t voodoo_x86_data[2][BLOCK_NUM]; -static int last_block[2] = {0, 0}; -static int next_block_to_write[2] = {0, 0}; +static int last_block[4] = {0, 0}; +static int next_block_to_write[4] = {0, 0}; -#define addbyte(val) \ - if (block_pos < BLOCK_SIZE) \ - code_block[block_pos++] = val; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addbyte(val) \ + do { \ + code_block[block_pos++] = val; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addword(val) \ - *(uint16_t *)&code_block[block_pos] = val; \ - block_pos += 2; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addword(val) \ + do { \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addlong(val) \ - *(uint32_t *)&code_block[block_pos] = val; \ - block_pos += 4; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addlong(val) \ + do { \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addquad(val) \ - *(uint64_t *)&code_block[block_pos] = val; \ - block_pos += 8; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addquad(val) \ + do { \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) static __m128i xmm_01_w;// = 0x0001000100010001ull; static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull; static __m128i xmm_ff_b;// = 0x00000000ffffffffull; -static uint32_t zero = 0; - static __m128i alookup[257], aminuslookup[256]; static __m128i minus_254;// = 0xff02ff02ff02ff02ull; static __m128i bilinear_lookup[256*2]; @@ -157,10 +164,11 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x89); /*MOV state->tex_s, EBX*/ addbyte(0x9f); addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x0f); /*MOVZX EAX, logtable[RAX]*/ + addbyte(0x41); /*MOVZX EAX, R9(logtable)[RAX]*/ + addbyte(0x0f); addbyte(0xb6); - addbyte(0x80); - addlong((uint32_t)(uintptr_t)logtable); + addbyte(0x04); + addbyte(0x01); addbyte(0x09); /*OR EAX, EDX*/ addbyte(0xd0); addbyte(0x03); /*ADD EAX, state->lod*/ @@ -334,11 +342,10 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(1); if (state->clamp_t[tmu]) { - addbyte(0x0f); /*CMOVS EDX, zero*/ + addbyte(0x41); /*CMOVS EDX, R10(alookup[0](zero))*/ + addbyte(0x0f); addbyte(0x48); - addbyte(0x14); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&zero); + addbyte(0x12); addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ addbyte(0x96); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -348,11 +355,10 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); addbyte(0x85); /*TEST EBX,EBX*/ addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/ + addbyte(0x0f); addbyte(0x48); - addbyte(0x1c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&zero); + addbyte(0x1a); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ addbyte(0x9e); addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); @@ -395,11 +401,10 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v addbyte(0x8b); /*MOV ebp_store2, RSI*/ addbyte(0xb7); addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/ + addbyte(0x0f); addbyte(0x48); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&zero); + addbyte(0x02); addbyte(0x78); /*JS + - clamp on 0*/ addbyte(2+3+2+ 5+5+2); addbyte(0x3b); /*CMP EAX, EBP*/ @@ -610,11 +615,10 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v { addbyte(0x85); /*TEST EAX, EAX*/ addbyte(0xc0); - addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/ + addbyte(0x0f); addbyte(0x48); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&zero); + addbyte(0x02); addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ addbyte(0x84); addbyte(0x8e); @@ -637,11 +641,10 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v { addbyte(0x85); /*TEST EBX, EBX*/ addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/ + addbyte(0x0f); addbyte(0x48); - addbyte(0x1c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&zero); + addbyte(0x1a); addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ addbyte(0x9c); addbyte(0x8e); @@ -707,11 +710,48 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x57); /*PUSH RDI*/ addbyte(0x56); /*PUSH RSI*/ addbyte(0x53); /*PUSH RBX*/ + addbyte(0x41); /*PUSH R12*/ + addbyte(0x54); + addbyte(0x41); /*PUSH R13*/ + addbyte(0x55); addbyte(0x41); /*PUSH R14*/ addbyte(0x56); addbyte(0x41); /*PUSH R15*/ addbyte(0x57); + addbyte(0x49); /*MOV R15, xmm_01_w*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_01_w); + addbyte(0x66); /*MOVDQA XMM8, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (0 << 3)); + addbyte(0x49); /*MOV R15, xmm_ff_w*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_ff_w); + addbyte(0x66); /*MOVDQA XMM9, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (1 << 3)); + addbyte(0x49); /*MOV R15, xmm_ff_b*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_ff_b); + addbyte(0x66); /*MOVDQA XMM10, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (2 << 3)); + addbyte(0x49); /*MOV R15, minus_254*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&minus_254); + addbyte(0x66); /*MOVDQA XMM11, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (3 << 3)); + #if WIN64 addbyte(0x48); /*MOV RDI, RCX (voodoo_state)*/ addbyte(0x89); @@ -723,17 +763,56 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x89); addbyte(0xce); #else - addbyte(0x49); /*MOV R9, RCX (real_y)*/ + addbyte(0x49); /*MOV R14, RCX (real_y)*/ addbyte(0x89); - addbyte(0xc9); + addbyte(0xce); addbyte(0x49); /*MOV R15, RSI (voodoo_state)*/ addbyte(0x89); addbyte(0xf7); #endif + + addbyte(0x49); /*MOV R9, logtable*/ + addbyte(0xb8 | (9 & 7)); + addquad((uint64_t)(uintptr_t)&logtable); + addbyte(0x49); /*MOV R10, alookup*/ + addbyte(0xb8 | (10 & 7)); + addquad((uint64_t)(uintptr_t)&alookup); + addbyte(0x49); /*MOV R11, aminuslookup*/ + addbyte(0xb8 | (11 & 7)); + addquad((uint64_t)(uintptr_t)&aminuslookup); + addbyte(0x49); /*MOV R12, xmm_00_ff_w*/ + addbyte(0xb8 | (12 & 7)); + addquad((uint64_t)(uintptr_t)&xmm_00_ff_w); + addbyte(0x49); /*MOV R13, i_00_ff_w*/ + addbyte(0xb8 | (13 & 7)); + addquad((uint64_t)(uintptr_t)&i_00_ff_w); + loop_jump_pos = block_pos; addbyte(0x4c); /*MOV RSI, R15*/ addbyte(0x89); addbyte(0xfe); + if (params->col_tiled || params->aux_tiled) + { + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*AND EAX, 63*/ + addbyte(0xe0); + addbyte(63); + addbyte(0xc1); /*SHR EBX, 6*/ + addbyte(0xeb); + addbyte(6); + addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/ + addbyte(0xe3); + addbyte(11); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x_tiled)); + } addbyte(0x66); /*PXOR XMM2, XMM2*/ addbyte(0x0f); addbyte(0xef); @@ -851,7 +930,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo { addbyte(0x8b); /*MOV EBX, state->x[EDI]*/ addbyte(0x9f); - addlong(offsetof(voodoo_state_t, x)); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x48); /*MOV RCX, aux_mem[RDI]*/ addbyte(0x8b); addbyte(0x8f); @@ -1075,27 +1157,26 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) { - addbyte(0x66); /*PXOR XMM0, xmm_00_ff_w[EBX]*/ - addbyte(0x0f); - addbyte(0xef); - addbyte(0x83); - addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]); - } - else if (!tc_reverse_blend_1) - { - addbyte(0x66); /*PXOR XMM0, xmm_ff_w*/ + addbyte(0x66); /*PXOR XMM0, R12(xmm_00_ff_w)[EBX]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xef); addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0x1c); } - addbyte(0x66); /*PADDW XMM0, xmm_01_w*/ + else if (!tc_reverse_blend_1) + { + addbyte(0x66); /*PXOR XMM0, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc1); + } + addbyte(0x66); /*PADDW XMM0, XMM8(xmm_01_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_01_w); + addbyte(0xc0); addbyte(0xf3); /*MOVQ XMM1, XMM2*/ addbyte(0x0f); addbyte(0x7e); @@ -1213,10 +1294,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) { - addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/ - addbyte(0x04); + addbyte(0x41); /*XOR EAX, R13(i_00_ff_w)[ECX*4]*/ + addbyte(0x33); + addbyte(0x44); addbyte(0x8d); - addlong((uint32_t)(uintptr_t)i_00_ff_w); + addbyte(0); } else if (!tc_reverse_blend_1) { @@ -1399,27 +1481,26 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) { - addbyte(0x66); /*PXOR XMM4, xmm_00_ff_w[EBX]*/ - addbyte(0x0f); - addbyte(0xef); - addbyte(0xa3); - addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]); - } - else if (!tc_reverse_blend) - { - addbyte(0x66); /*PXOR XMM4, FF*/ + addbyte(0x66); /*PXOR XMM4, R12(xmm_00_ff_w)[EBX]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xef); addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0x1c); } - addbyte(0x66); /*PADDW XMM4, 1*/ + else if (!tc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM4, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe1); + } + addbyte(0x66); /*PADDW XMM4, XMM8(xmm_01_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_01_w); + addbyte(0xe0); addbyte(0xf3); /*MOVQ XMM5, XMM1*/ addbyte(0x0f); addbyte(0x7e); @@ -1483,11 +1564,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } if (tc_invert_output) { - addbyte(0x66); /*PXOR XMM1, FF*/ + addbyte(0x66); /*PXOR XMM1, XMM9(xmm_ff_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xef); - addbyte(0x0d); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0xc9); } addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ @@ -1581,10 +1662,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) { - addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/ - addbyte(0x1c); + addbyte(0x41); /*XOR EBX, R13(i_00_ff_w)[ECX*4]*/ + addbyte(0x33); + addbyte(0x5c); addbyte(0x8d); - addlong((uint32_t)(uintptr_t)i_00_ff_w); + addbyte(0); } else if (!tca_reverse_blend) { @@ -2131,19 +2213,17 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xe0); if (!cc_reverse_blend) { - addbyte(0x66); /*PXOR XMM3, 0xff*/ + addbyte(0x66); /*PXOR XMM3, XMM9(xmm_ff_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xef); - addbyte(0x1c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0xd9); } - addbyte(0x66); /*PADDW XMM3, 1*/ + addbyte(0x66); /*PADDW XMM3, XMM8(xmm_01_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x1c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_01_w); + addbyte(0xd8); addbyte(0x66); /*PMULLW XMM0, XMM3*/ addbyte(0x0f); addbyte(0xd5); @@ -2182,12 +2262,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo if (cc_invert_output) { - addbyte(0x66); /*PXOR XMM0, 0xff*/ + addbyte(0x66); /*PXOR XMM0, XMM10(xmm_ff_b)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xef); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_b); + addbyte(0xc2); } if (params->fogMode & FOG_ENABLE) @@ -2344,11 +2423,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xc0); addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); - addbyte(0x1c); - addbyte(0xc5); - addlong(((uintptr_t)alookup) + 16); + addbyte(0x5c); + addbyte(0xc2); + addbyte(16); addbyte(0x66); /*PSRAW XMM3, 7*/ addbyte(0x0f); addbyte(0x71); @@ -2437,7 +2517,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addquad((uintptr_t)rgb565); addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ addbyte(0x87); - addlong(offsetof(voodoo_state_t, x)); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x48); /*MOV RBP, fb_mem*/ addbyte(0x8b); addbyte(0xaf); @@ -2477,22 +2560,22 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xe4); break; case AFUNC_ASRC_ALPHA: - addbyte(0x66); /*PMULLW XMM4, alookup[EDX*8]*/ + addbyte(0x66); /*PMULLW XMM4, R10(alookup)[EDX*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); addbyte(0x24); - addbyte(0xd5); - addlong((uint32_t)(uintptr_t)alookup); + addbyte(0xd2); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); addbyte(0xec); - addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x62); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2517,12 +2600,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x0f); addbyte(0x7e); addbyte(0xec); - addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x62); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2543,22 +2626,22 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo case AFUNC_AONE: break; case AFUNC_AOMSRC_ALPHA: - addbyte(0x66); /*PMULLW XMM4, aminuslookup[EDX*8]*/ + addbyte(0x66); /*PMULLW XMM4, R11(aminuslookup)[EDX*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); addbyte(0x24); - addbyte(0xd5); - addlong((uint32_t)(uintptr_t)aminuslookup); + addbyte(0xd3); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); addbyte(0xec); - addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x62); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2575,12 +2658,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(8); break; case AFUNC_AOM_COLOR: - addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/ + addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0x7e); - addbyte(0x2c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0xe9); addbyte(0x66); /*PSUBW XMM5, XMM0*/ addbyte(0x0f); addbyte(0xf9); @@ -2594,11 +2676,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x7e); addbyte(0xec); addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x62); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2621,22 +2703,21 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xe4); break; case AFUNC_ASATURATE: - addbyte(0x66); /*PMULLW XMM4, minus_254*/ + addbyte(0x66); /*PMULLW XMM4, XMM11(minus_254)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); - addbyte(0x24); - addbyte(0xd5); - addlong((uint32_t)(uintptr_t)&minus_254); + addbyte(0xe3); addbyte(0xf3); /*MOVQ XMM5, XMM4*/ addbyte(0x0f); addbyte(0x7e); addbyte(0xec); addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x24); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x62); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2662,22 +2743,22 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xc0); break; case AFUNC_ASRC_ALPHA: - addbyte(0x66); /*PMULLW XMM0, alookup[EDX*8]*/ + addbyte(0x66); /*PMULLW XMM0, R10(alookup)[EDX*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); addbyte(0x04); - addbyte(0xd5); - addlong((uint32_t)(uintptr_t)alookup); + addbyte(0xd2); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); addbyte(0xe8); - addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x42); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2702,12 +2783,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x0f); addbyte(0x7e); addbyte(0xe8); - addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x42); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2728,22 +2809,22 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo case AFUNC_AONE: break; case AFUNC_AOMSRC_ALPHA: - addbyte(0x66); /*PMULLW XMM0, aminuslookup[EDX*8]*/ + addbyte(0x66); /*PMULLW XMM0, R11(aminuslookup)[EDX*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xd5); addbyte(0x04); - addbyte(0xd5); - addlong((uint32_t)(uintptr_t)aminuslookup); + addbyte(0xd3); addbyte(0xf3); /*MOVQ XMM5, XMM0*/ addbyte(0x0f); addbyte(0x7e); addbyte(0xe8); addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x42); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2760,12 +2841,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(8); break; case AFUNC_AOM_COLOR: - addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/ + addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/ + addbyte(0x41); addbyte(0x0f); addbyte(0x7e); - addbyte(0x2c); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)&xmm_ff_w); + addbyte(0xe9); addbyte(0x66); /*PSUBW XMM5, XMM6*/ addbyte(0x0f); addbyte(0xf9); @@ -2779,11 +2859,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x7e); addbyte(0xe8); addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x41); addbyte(0x0f); addbyte(0xfd); - addbyte(0x04); - addbyte(0x25); - addlong((uint32_t)(uintptr_t)alookup + 16); + addbyte(0x42); + addbyte(8*2); addbyte(0x66); /*PSRLW XMM5, 8*/ addbyte(0x0f); addbyte(0x71); @@ -2822,8 +2902,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ addbyte(0x97); - addlong(offsetof(voodoo_state_t, x)); - + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x66); /*MOV EAX, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2889,7 +2972,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ addbyte(0x97); - addlong(offsetof(voodoo_state_t, x)); + if (voodoo->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x4c); /*ADD RSI, R8*/ addbyte(0x01); addbyte(0xc6); @@ -2975,6 +3061,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) { + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x66); /*MOV AX, new_depth*/ addbyte(0x8b); addbyte(0x87); @@ -3234,6 +3326,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x5f); addbyte(0x41); /*POP R14*/ addbyte(0x5e); + addbyte(0x41); /*POP R13*/ + addbyte(0x5d); + addbyte(0x41); /*POP R12*/ + addbyte(0x5c); addbyte(0x5b); /*POP RBX*/ addbyte(0x5e); /*POP RSI*/ addbyte(0x5f); /*POP RDI*/ @@ -3241,7 +3337,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xC3); /*RET*/ } -static int voodoo_recomp = 0; +int voodoo_recomp = 0; static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) { int c; @@ -3251,7 +3347,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, for (c = 0; c < 8; c++) { - data = &voodoo_x86_data[odd_even + c*2]; //&voodoo_x86_data[odd_even][b]; + data = &voodoo_x86_data[odd_even + c*4]; //&voodoo_x86_data[odd_even][b]; if (state->xdir == data->xdir && params->alphaMode == data->alphaMode && @@ -3262,7 +3358,8 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, params->textureMode[0] == data->textureMode[0] && params->textureMode[1] == data->textureMode[1] && (params->tLOD[0] & LOD_MASK) == data->tLOD[0] && - (params->tLOD[1] & LOD_MASK) == data->tLOD[1]) + (params->tLOD[1] & LOD_MASK) == data->tLOD[1] && + ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled) { last_block[odd_even] = b; return data->code_block; @@ -3271,7 +3368,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, b = (b + 1) & 7; } voodoo_recomp++; - data = &voodoo_x86_data[odd_even + next_block_to_write[odd_even]*2]; + data = &voodoo_x86_data[odd_even + next_block_to_write[odd_even]*4]; // code_block = data->code_block; voodoo_generate(data->code_block, voodoo, params, state, depth_op); @@ -3286,36 +3383,21 @@ voodoo_recomp++; data->textureMode[1] = params->textureMode[1]; data->tLOD[0] = params->tLOD[0] & LOD_MASK; data->tLOD[1] = params->tLOD[1] & LOD_MASK; + data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0; next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; return data->code_block; } -static void voodoo_codegen_init(voodoo_t *voodoo) +void voodoo_codegen_init(voodoo_t *voodoo) { int c; -#ifdef __linux__ - void *start; - size_t len; - long pagesize = sysconf(_SC_PAGESIZE); - long pagemask = ~(pagesize - 1); -#endif #if WIN64 - voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM * 2, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM * 4, MEM_COMMIT, PAGE_EXECUTE_READWRITE); #else - voodoo->codegen_data = malloc(sizeof(voodoo_x86_data_t) * BLOCK_NUM * 2); -#endif - -#ifdef __linux__ - start = (void *)((long)voodoo->codegen_data & pagemask); - len = ((sizeof(voodoo_x86_data_t) * BLOCK_NUM * 2) + pagesize) & pagemask; - if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) - { - perror("mprotect"); - exit(-1); - } + voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0); #endif for (c = 0; c < 256; c++) @@ -3340,12 +3422,12 @@ static void voodoo_codegen_init(voodoo_t *voodoo) xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); } -static void voodoo_codegen_close(voodoo_t *voodoo) +void voodoo_codegen_close(voodoo_t *voodoo) { #if WIN64 VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE); #else - free(voodoo->codegen_data); + munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4); #endif } diff --git a/src/include/86box/vid_voodoo_codegen_x86.h b/src/include/86box/vid_voodoo_codegen_x86.h index 15c9951c1..a9f2c5533 100644 --- a/src/include/86box/vid_voodoo_codegen_x86.h +++ b/src/include/86box/vid_voodoo_codegen_x86.h @@ -5,15 +5,16 @@ fbzColorPath */ -#ifdef __linux__ -# include -# include +#if defined(__linux__) || defined(__APPLE__) +#include +#include #endif #if defined WIN32 || defined _WIN32 || defined _WIN32 -# include +#define BITMAP windows_BITMAP +#include +#undef BITMAP #endif -#include #include #define BLOCK_NUM 8 @@ -32,35 +33,43 @@ typedef struct voodoo_x86_data_t uint32_t fbzColorPath; uint32_t textureMode[2]; uint32_t tLOD[2]; - uint32_t trexInit1; + uint32_t trexInit1; + int is_tiled; } voodoo_x86_data_t; -static int last_block[2] = {0, 0}; -static int next_block_to_write[2] = {0, 0}; +static int last_block[4] = {0, 0}; +static int next_block_to_write[4] = {0, 0}; -#define addbyte(val) \ - if (block_pos < BLOCK_SIZE) \ - code_block[block_pos++] = val; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addbyte(val) \ + do { \ + code_block[block_pos++] = val; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addword(val) \ - *(uint16_t *)&code_block[block_pos] = val; \ - block_pos += 2; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addword(val) \ + do { \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addlong(val) \ - *(uint32_t *)&code_block[block_pos] = val; \ - block_pos += 4; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addlong(val) \ + do { \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) -#define addquad(val) \ - *(uint64_t *)&code_block[block_pos] = val; \ - block_pos += 8; \ - if (block_pos >= BLOCK_SIZE) \ - fatal("Over!\n") +#define addquad(val) \ + do { \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) static __m128i xmm_01_w;// = 0x0001000100010001ull; @@ -694,6 +703,28 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x74); addbyte(0x24); addbyte(8+16); + if (params->col_tiled || params->aux_tiled) + { + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*AND EAX, 63*/ + addbyte(0xe0); + addbyte(63); + addbyte(0xc1); /*SHR EBX, 6*/ + addbyte(0xeb); + addbyte(6); + addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/ + addbyte(0xe3); + addbyte(11); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x_tiled)); + } addbyte(0x66); /*PXOR XMM2, XMM2*/ addbyte(0x0f); addbyte(0xef); @@ -826,7 +857,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo { addbyte(0x8b); /*MOV EBX, state->x[EDI]*/ addbyte(0x9f); - addlong(offsetof(voodoo_state_t, x)); + if (voodoo->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x8b);/*MOV ECX, aux_mem[EDI]*/ addbyte(0x8f); addlong(offsetof(voodoo_state_t, aux_mem)); @@ -2436,7 +2470,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo { addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ addbyte(0x87); - addlong(offsetof(voodoo_state_t, x)); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x8b); /*MOV EBP, fb_mem*/ addbyte(0xaf); addlong(offsetof(voodoo_state_t, fb_mem)); @@ -2815,8 +2852,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ addbyte(0x97); - addlong(offsetof(voodoo_state_t, x)); - + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x66); /*MOV EAX, XMM0*/ addbyte(0x0f); addbyte(0x7e); @@ -2884,7 +2924,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo } addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ addbyte(0x97); - addlong(offsetof(voodoo_state_t, x)); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); if (dither2x2) { addbyte(0xc1); /*SHL ECX, 2*/ @@ -2968,6 +3011,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) { + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); addbyte(0x66); /*MOV AX, new_depth*/ addbyte(0x8b); addbyte(0x87); @@ -3227,7 +3276,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) cs = cs; } -static int voodoo_recomp = 0; +int voodoo_recomp = 0; static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) { @@ -3238,7 +3287,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, for (c = 0; c < 8; c++) { - data = &codegen_data[odd_even + b*2]; + data = &codegen_data[odd_even + b*4]; if (state->xdir == data->xdir && params->alphaMode == data->alphaMode && @@ -3249,7 +3298,8 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, params->textureMode[0] == data->textureMode[0] && params->textureMode[1] == data->textureMode[1] && (params->tLOD[0] & LOD_MASK) == data->tLOD[0] && - (params->tLOD[1] & LOD_MASK) == data->tLOD[1]) + (params->tLOD[1] & LOD_MASK) == data->tLOD[1] && + ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled) { last_block[odd_even] = b; return data->code_block; @@ -3258,7 +3308,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, b = (b + 1) & 7; } voodoo_recomp++; - data = &codegen_data[odd_even + next_block_to_write[odd_even]*2]; + data = &codegen_data[odd_even + next_block_to_write[odd_even]*4]; // code_block = data->code_block; voodoo_generate(data->code_block, voodoo, params, state, depth_op); @@ -3273,16 +3323,17 @@ voodoo_recomp++; data->textureMode[1] = params->textureMode[1]; data->tLOD[0] = params->tLOD[0] & LOD_MASK; data->tLOD[1] = params->tLOD[1] & LOD_MASK; + data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0; next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; return data->code_block; } -static void voodoo_codegen_init(voodoo_t *voodoo) +void voodoo_codegen_init(voodoo_t *voodoo) { int c; -#ifdef __linux__ +#if defined(__linux__) || defined(__APPLE__) void *start; size_t len; long pagesize = sysconf(_SC_PAGESIZE); @@ -3290,19 +3341,9 @@ static void voodoo_codegen_init(voodoo_t *voodoo) #endif #if defined WIN32 || defined _WIN32 || defined _WIN32 - voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*2, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, MEM_COMMIT, PAGE_EXECUTE_READWRITE); #else - voodoo->codegen_data = malloc(sizeof(voodoo_x86_data_t) * BLOCK_NUM*2); -#endif - -#ifdef __linux__ - start = (void *)((long)voodoo->codegen_data & pagemask); - len = ((sizeof(voodoo_x86_data_t) * BLOCK_NUM*2) + pagesize) & pagemask; - if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) - { - perror("mprotect"); - exit(-1); - } + voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0); #endif for (c = 0; c < 256; c++) @@ -3327,11 +3368,11 @@ static void voodoo_codegen_init(voodoo_t *voodoo) xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); } -static void voodoo_codegen_close(voodoo_t *voodoo) +void voodoo_codegen_close(voodoo_t *voodoo) { #if defined WIN32 || defined _WIN32 || defined _WIN32 VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE); #else - free(voodoo->codegen_data); + munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4); #endif } diff --git a/src/include/86box/vid_voodoo_common.h b/src/include/86box/vid_voodoo_common.h new file mode 100644 index 000000000..073d2ca3d --- /dev/null +++ b/src/include/86box/vid_voodoo_common.h @@ -0,0 +1,516 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +#ifdef CLAMP +#undef CLAMP +#endif + +#define CLAMP(x) (((x) < 0) ? 0 : (((x) > 0xff) ? 0xff : (x))) +#define CLAMP16(x) (((x) < 0) ? 0 : (((x) > 0xffff) ? 0xffff : (x))) + + +#define LOD_MAX 8 + +#define TEX_DIRTY_SHIFT 10 + +#define TEX_CACHE_MAX 64 + +enum +{ + VOODOO_1 = 0, + VOODOO_SB50, + VOODOO_2, + VOODOO_BANSHEE, + VOODOO_3 +}; + +typedef union int_float +{ + uint32_t i; + float f; +} int_float; + +typedef struct rgbvoodoo_t +{ + uint8_t b, g, r; + uint8_t pad; +} rgbvoodoo_t; +typedef struct rgba8_t +{ + uint8_t b, g, r, a; +} rgba8_t; + +typedef union rgba_u +{ + struct + { + uint8_t b, g, r, a; + } rgba; + uint32_t u; +} rgba_u; + +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) + +#define FIFO_ENTRIES (voodoo->fifo_write_idx - voodoo->fifo_read_idx) +#define FIFO_FULL ((voodoo->fifo_write_idx - voodoo->fifo_read_idx) >= FIFO_SIZE-4) +#define FIFO_EMPTY (voodoo->fifo_read_idx == voodoo->fifo_write_idx) + +#define FIFO_TYPE 0xff000000 +#define FIFO_ADDR 0x00ffffff + +enum +{ + FIFO_INVALID = (0x00 << 24), + FIFO_WRITEL_REG = (0x01 << 24), + FIFO_WRITEW_FB = (0x02 << 24), + FIFO_WRITEL_FB = (0x03 << 24), + FIFO_WRITEL_TEX = (0x04 << 24), + FIFO_WRITEL_2DREG = (0x05 << 24) +}; + +#define PARAM_SIZE 1024 +#define PARAM_MASK (PARAM_SIZE - 1) +#define PARAM_ENTRY_SIZE (1 << 31) + +#define PARAM_ENTRIES(x) (voodoo->params_write_idx - voodoo->params_read_idx[x]) +#define PARAM_FULL(x) ((voodoo->params_write_idx - voodoo->params_read_idx[x]) >= PARAM_SIZE) +#define PARAM_EMPTY(x) (voodoo->params_read_idx[x] == voodoo->params_write_idx) + +typedef struct +{ + uint32_t addr_type; + uint32_t val; +} fifo_entry_t; + +typedef struct voodoo_params_t +{ + int command; + + int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; + + uint32_t startR, startG, startB, startZ, startA; + + int32_t dBdX, dGdX, dRdX, dAdX, dZdX; + + int32_t dBdY, dGdY, dRdY, dAdY, dZdY; + + int64_t startW, dWdX, dWdY; + + struct + { + int64_t startS, startT, startW, p1; + int64_t dSdX, dTdX, dWdX, p2; + int64_t dSdY, dTdY, dWdY, p3; + } tmu[2]; + + uint32_t color0, color1; + + uint32_t fbzMode; + uint32_t fbzColorPath; + + uint32_t fogMode; + rgbvoodoo_t fogColor; + struct + { + uint8_t fog, dfog; + } fogTable[64]; + + uint32_t alphaMode; + + uint32_t zaColor; + + int chromaKey_r, chromaKey_g, chromaKey_b; + uint32_t chromaKey; + + uint32_t textureMode[2]; + uint32_t tLOD[2]; + + uint32_t texBaseAddr[2], texBaseAddr1[2], texBaseAddr2[2], texBaseAddr38[2]; + + uint32_t tex_base[2][LOD_MAX+2]; + uint32_t tex_end[2][LOD_MAX+2]; + int tex_width[2]; + int tex_w_mask[2][LOD_MAX+2]; + int tex_w_nmask[2][LOD_MAX+2]; + int tex_h_mask[2][LOD_MAX+2]; + int tex_shift[2][LOD_MAX+2]; + int tex_lod[2][LOD_MAX+2]; + int tex_entry[2]; + int detail_max[2], detail_bias[2], detail_scale[2]; + + uint32_t draw_offset, aux_offset; + + int tformat[2]; + + int clipLeft, clipRight, clipLowY, clipHighY; + int clipLeft1, clipRight1, clipLowY1, clipHighY1; + + int sign; + + uint32_t front_offset; + + uint32_t swapbufferCMD; + + uint32_t stipple; + + int col_tiled, aux_tiled; + int row_width, aux_row_width; +} voodoo_params_t; + +typedef struct texture_t +{ + uint32_t base; + uint32_t tLOD; + volatile int refcount, refcount_r[4]; + int is16; + uint32_t palette_checksum; + uint32_t addr_start[4], addr_end[4]; + uint32_t *data; +} texture_t; + +typedef struct vert_t +{ + float sVx, sVy; + float sRed, sGreen, sBlue, sAlpha; + float sVz, sWb; + float sW0, sS0, sT0; + float sW1, sS1, sT1; +} vert_t; + +typedef struct clip_t +{ + int x_min, x_max; + int y_min, y_max; +} clip_t; + +typedef struct voodoo_t +{ + mem_mapping_t mapping; + + int pci_enable; + + uint8_t dac_data[8]; + int dac_reg, dac_reg_ff; + uint8_t dac_readdata; + uint16_t dac_pll_regs[16]; + + float pixel_clock; + uint64_t line_time; + + voodoo_params_t params; + + uint32_t fbiInit0, fbiInit1, fbiInit2, fbiInit3, fbiInit4; + uint32_t fbiInit5, fbiInit6, fbiInit7; /*Voodoo 2*/ + + uint32_t initEnable; + + uint32_t lfbMode; + + uint32_t memBaseAddr; + + int_float fvertexAx, fvertexAy, fvertexBx, fvertexBy, fvertexCx, fvertexCy; + + uint32_t front_offset, back_offset; + + uint32_t fb_read_offset, fb_write_offset; + + int row_width, aux_row_width; + int block_width; + + int col_tiled, aux_tiled; + + uint8_t *fb_mem, *tex_mem[2]; + uint16_t *tex_mem_w[2]; + + int rgb_sel; + + uint32_t trexInit1[2]; + + uint32_t tmuConfig; + + mutex_t *swap_mutex; + int swap_count; + + int disp_buffer, draw_buffer; + pc_timer_t timer; + + int line; + svga_t *svga; + + uint32_t backPorch; + uint32_t videoDimensions; + uint32_t hSync, vSync; + + int h_total, v_total, v_disp; + int h_disp; + int v_retrace; + + struct + { + uint32_t y[4], i[4], q[4]; + } nccTable[2][2]; + + rgba_u palette[2][256]; + + rgba_u ncc_lookup[2][2][256]; + int ncc_dirty[2]; + + thread_t *fifo_thread; + thread_t *render_thread[4]; + event_t *wake_fifo_thread; + event_t *wake_main_thread; + event_t *fifo_not_full_event; + event_t *render_not_full_event[4]; + event_t *wake_render_thread[4]; + + int voodoo_busy; + int render_voodoo_busy[4]; + + int render_threads; + int odd_even_mask; + + int pixel_count[4], texel_count[4], tri_count, frame_count; + int pixel_count_old[4], texel_count_old[4]; + int wr_count, rd_count, tex_count; + + int retrace_count; + int swap_interval; + uint32_t swap_offset; + int swap_pending; + + int bilinear_enabled; + + int fb_size; + uint32_t fb_mask; + + int texture_size; + uint32_t texture_mask; + + int dual_tmus; + int type; + + fifo_entry_t fifo[FIFO_SIZE]; + volatile int fifo_read_idx, fifo_write_idx; + volatile int cmd_read, cmd_written, cmd_written_fifo; + + voodoo_params_t params_buffer[PARAM_SIZE]; + volatile int params_read_idx[4], params_write_idx; + + uint32_t cmdfifo_base, cmdfifo_end, cmdfifo_size; + int cmdfifo_rp; + volatile int cmdfifo_depth_rd, cmdfifo_depth_wr; + volatile int cmdfifo_enabled; + uint32_t cmdfifo_amin, cmdfifo_amax; + int cmdfifo_holecount; + + uint32_t sSetupMode; + vert_t verts[4]; + unsigned int vertex_ages[3]; + unsigned int vertex_next_age; + int num_verticies; + int cull_pingpong; + + int flush; + + int scrfilter; + int scrfilterEnabled; + int scrfilterThreshold; + int scrfilterThresholdOld; + + uint32_t last_write_addr; + + uint32_t fbiPixelsIn; + uint32_t fbiChromaFail; + uint32_t fbiZFuncFail; + uint32_t fbiAFuncFail; + uint32_t fbiPixelsOut; + + uint32_t bltSrcBaseAddr; + uint32_t bltDstBaseAddr; + int bltSrcXYStride, bltDstXYStride; + uint32_t bltSrcChromaRange, bltDstChromaRange; + int bltSrcChromaMinR, bltSrcChromaMinG, bltSrcChromaMinB; + int bltSrcChromaMaxR, bltSrcChromaMaxG, bltSrcChromaMaxB; + int bltDstChromaMinR, bltDstChromaMinG, bltDstChromaMinB; + int bltDstChromaMaxR, bltDstChromaMaxG, bltDstChromaMaxB; + + int bltClipRight, bltClipLeft; + int bltClipHighY, bltClipLowY; + + int bltSrcX, bltSrcY; + int bltDstX, bltDstY; + int bltSizeX, bltSizeY; + int bltRop[4]; + uint16_t bltColorFg, bltColorBg; + + uint32_t bltCommand; + + uint32_t leftOverlayBuf; + + struct + { + int dst_x, dst_y; + int cur_x; + int size_x, size_y; + int x_dir, y_dir; + int dst_stride; + } blt; + + struct + { + uint32_t bresError0, bresError1; + uint32_t clip0Min, clip0Max; + uint32_t clip1Min, clip1Max; + uint32_t colorBack, colorFore; + uint32_t command, commandExtra; + uint32_t dstBaseAddr; + uint32_t dstFormat; + uint32_t dstSize; + uint32_t dstXY; + uint32_t rop; + uint32_t srcBaseAddr; + uint32_t srcFormat; + uint32_t srcSize; + uint32_t srcXY; + + uint32_t colorPattern[64]; + + int bres_error_0, bres_error_1; + uint32_t colorPattern8[64], colorPattern16[64], colorPattern24[64]; + int cur_x, cur_y; + uint32_t dstBaseAddr_tiled; + uint32_t dstColorkeyMin, dstColorkeyMax; + int dstSizeX, dstSizeY; + int dstX, dstY; + int dst_stride; + int patoff_x, patoff_y; + uint8_t rops[4]; + uint32_t srcBaseAddr_tiled; + uint32_t srcColorkeyMin, srcColorkeyMax; + int srcSizeX, srcSizeY; + int srcX, srcY; + int src_stride; + int old_srcX; + + /*Used for handling packed 24bpp host data*/ + int host_data_remainder; + uint32_t old_host_data; + + /*Polyfill coordinates*/ + int lx[2], rx[2]; + int ly[2], ry[2]; + + /*Polyfill state*/ + int error[2]; + int dx[2], dy[2]; + int x_inc[2]; /*y_inc is always 1 for polyfill*/ + int lx_cur, rx_cur; + + clip_t clip[2]; + + uint8_t host_data[16384]; + int host_data_count; + int host_data_size_src, host_data_size_dest; + int src_stride_src, src_stride_dest; + + int src_bpp; + } banshee_blt; + + struct + { + uint32_t vidOverlayStartCoords; + uint32_t vidOverlayEndScreenCoords; + uint32_t vidOverlayDudx, vidOverlayDudxOffsetSrcWidth; + uint32_t vidOverlayDvdy, vidOverlayDvdyOffset; + //uint32_t vidDesktopOverlayStride; + + int start_x, start_y; + int end_x, end_y; + int size_x, size_y; + int overlay_bytes; + + unsigned int src_y; + } overlay; + + rgbvoodoo_t clutData[33]; + int clutData_dirty; + rgbvoodoo_t clutData256[256]; + uint32_t video_16to32[0x10000]; + + uint8_t dirty_line[2048]; + int dirty_line_low, dirty_line_high; + + int fb_write_buffer, fb_draw_buffer; + int buffer_cutoff; + + uint32_t tile_base, tile_stride; + int tile_stride_shift, tile_x, tile_x_real; + + int read_time, write_time, burst_time; + + pc_timer_t wake_timer; + + /* screen filter tables */ + uint8_t thefilter[256][256]; + uint8_t thefilterg[256][256]; + uint8_t thefilterb[256][256]; + uint16_t purpleline[256][3]; + + texture_t texture_cache[2][TEX_CACHE_MAX]; + uint8_t texture_present[2][16384]; + int texture_last_removed; + + uint32_t palette_checksum[2]; + int palette_dirty[2]; + + uint64_t time; + int render_time[4]; + + int use_recompiler; + void *codegen_data; + + struct voodoo_set_t *set; + + + uint8_t *vram, *changedvram; + + void *p; +} voodoo_t; + +typedef struct voodoo_set_t +{ + voodoo_t *voodoos[2]; + + mem_mapping_t snoop_mapping; + + int nr_cards; +} voodoo_set_t; + + +extern rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000]; + + +void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg); + +void voodoo_recalc(voodoo_t *voodoo); +void voodoo_update_ncc(voodoo_t *voodoo, int tmu); + +void *voodoo_2d3d_card_init(int type); +void voodoo_card_close(voodoo_t *voodoo); diff --git a/src/include/86box/vid_voodoo_display.h b/src/include/86box/vid_voodoo_display.h new file mode 100644 index 000000000..4768a2b62 --- /dev/null +++ b/src/include/86box/vid_voodoo_display.h @@ -0,0 +1,24 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +void voodoo_update_ncc(voodoo_t *voodoo, int tmu); +void voodoo_pixelclock_update(voodoo_t *voodoo); +void voodoo_generate_filter_v1(voodoo_t *voodoo); +void voodoo_generate_filter_v2(voodoo_t *voodoo); +void voodoo_threshold_check(voodoo_t *voodoo); +void voodoo_callback(void *p); diff --git a/src/include/86box/vid_voodoo_dither.h b/src/include/86box/vid_voodoo_dither.h index 21baf772b..98c613120 100644 --- a/src/include/86box/vid_voodoo_dither.h +++ b/src/include/86box/vid_voodoo_dither.h @@ -1,4 +1,22 @@ -uint8_t dither_rb[256][4][4] = +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics and 2 specific emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +static const uint8_t dither_rb[256][4][4] = { { {0, 0, 0, 0}, @@ -1538,7 +1556,7 @@ uint8_t dither_rb[256][4][4] = }, }; -uint8_t dither_g[256][4][4] = +static const uint8_t dither_g[256][4][4] = { { {0, 0, 0, 0}, @@ -3078,7 +3096,7 @@ uint8_t dither_g[256][4][4] = }, }; -uint8_t dither_rb2x2[256][2][2] = +static const uint8_t dither_rb2x2[256][2][2] = { { {0, 0}, @@ -4106,7 +4124,7 @@ uint8_t dither_rb2x2[256][2][2] = }, }; -uint8_t dither_g2x2[256][2][2] = +static const uint8_t dither_g2x2[256][2][2] = { { {0, 0}, diff --git a/src/include/86box/vid_voodoo_fb.h b/src/include/86box/vid_voodoo_fb.h new file mode 100644 index 000000000..95a223324 --- /dev/null +++ b/src/include/86box/vid_voodoo_fb.h @@ -0,0 +1,22 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +uint16_t voodoo_fb_readw(uint32_t addr, void *p); +uint32_t voodoo_fb_readl(uint32_t addr, void *p); +void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p); +void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p); diff --git a/src/include/86box/vid_voodoo_fifo.h b/src/include/86box/vid_voodoo_fifo.h new file mode 100644 index 000000000..0ba2f6920 --- /dev/null +++ b/src/include/86box/vid_voodoo_fifo.h @@ -0,0 +1,26 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +void voodoo_wake_fifo_thread(voodoo_t *voodoo); +void voodoo_wake_fifo_thread_now(voodoo_t *voodoo); +void voodoo_wake_timer(void *p); +void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val); +void voodoo_flush(voodoo_t *voodoo); +void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo); +void voodoo_wait_for_swap_complete(voodoo_t *voodoo); +void voodoo_fifo_thread(void *param); diff --git a/src/include/86box/vid_voodoo_reg.h b/src/include/86box/vid_voodoo_reg.h new file mode 100644 index 000000000..49fac4877 --- /dev/null +++ b/src/include/86box/vid_voodoo_reg.h @@ -0,0 +1,19 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ + +void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p); diff --git a/src/include/86box/vid_voodoo_regs.h b/src/include/86box/vid_voodoo_regs.h new file mode 100644 index 000000000..f7ab20899 --- /dev/null +++ b/src/include/86box/vid_voodoo_regs.h @@ -0,0 +1,691 @@ +enum +{ + SST_status = 0x000, + SST_intrCtrl = 0x004, + + SST_vertexAx = 0x008, + SST_vertexAy = 0x00c, + SST_vertexBx = 0x010, + SST_vertexBy = 0x014, + SST_vertexCx = 0x018, + SST_vertexCy = 0x01c, + + SST_startR = 0x0020, + SST_startG = 0x0024, + SST_startB = 0x0028, + SST_startZ = 0x002c, + SST_startA = 0x0030, + SST_startS = 0x0034, + SST_startT = 0x0038, + SST_startW = 0x003c, + + SST_dRdX = 0x0040, + SST_dGdX = 0x0044, + SST_dBdX = 0x0048, + SST_dZdX = 0x004c, + SST_dAdX = 0x0050, + SST_dSdX = 0x0054, + SST_dTdX = 0x0058, + SST_dWdX = 0x005c, + + SST_dRdY = 0x0060, + SST_dGdY = 0x0064, + SST_dBdY = 0x0068, + SST_dZdY = 0x006c, + SST_dAdY = 0x0070, + SST_dSdY = 0x0074, + SST_dTdY = 0x0078, + SST_dWdY = 0x007c, + + SST_triangleCMD = 0x0080, + + SST_fvertexAx = 0x088, + SST_fvertexAy = 0x08c, + SST_fvertexBx = 0x090, + SST_fvertexBy = 0x094, + SST_fvertexCx = 0x098, + SST_fvertexCy = 0x09c, + + SST_fstartR = 0x00a0, + SST_fstartG = 0x00a4, + SST_fstartB = 0x00a8, + SST_fstartZ = 0x00ac, + SST_fstartA = 0x00b0, + SST_fstartS = 0x00b4, + SST_fstartT = 0x00b8, + SST_fstartW = 0x00bc, + + SST_fdRdX = 0x00c0, + SST_fdGdX = 0x00c4, + SST_fdBdX = 0x00c8, + SST_fdZdX = 0x00cc, + SST_fdAdX = 0x00d0, + SST_fdSdX = 0x00d4, + SST_fdTdX = 0x00d8, + SST_fdWdX = 0x00dc, + + SST_fdRdY = 0x00e0, + SST_fdGdY = 0x00e4, + SST_fdBdY = 0x00e8, + SST_fdZdY = 0x00ec, + SST_fdAdY = 0x00f0, + SST_fdSdY = 0x00f4, + SST_fdTdY = 0x00f8, + SST_fdWdY = 0x00fc, + + SST_ftriangleCMD = 0x0100, + + SST_fbzColorPath = 0x104, + SST_fogMode = 0x108, + + SST_alphaMode = 0x10c, + SST_fbzMode = 0x110, + SST_lfbMode = 0x114, + + SST_clipLeftRight = 0x118, + SST_clipLowYHighY = 0x11c, + + SST_nopCMD = 0x120, + SST_fastfillCMD = 0x124, + SST_swapbufferCMD = 0x128, + + SST_fogColor = 0x12c, + SST_zaColor = 0x130, + SST_chromaKey = 0x134, + + SST_userIntrCMD = 0x13c, + SST_stipple = 0x140, + SST_color0 = 0x144, + SST_color1 = 0x148, + + SST_fbiPixelsIn = 0x14c, + SST_fbiChromaFail = 0x150, + SST_fbiZFuncFail = 0x154, + SST_fbiAFuncFail = 0x158, + SST_fbiPixelsOut = 0x15c, + + SST_fogTable00 = 0x160, + SST_fogTable01 = 0x164, + SST_fogTable02 = 0x168, + SST_fogTable03 = 0x16c, + SST_fogTable04 = 0x170, + SST_fogTable05 = 0x174, + SST_fogTable06 = 0x178, + SST_fogTable07 = 0x17c, + SST_fogTable08 = 0x180, + SST_fogTable09 = 0x184, + SST_fogTable0a = 0x188, + SST_fogTable0b = 0x18c, + SST_fogTable0c = 0x190, + SST_fogTable0d = 0x194, + SST_fogTable0e = 0x198, + SST_fogTable0f = 0x19c, + SST_fogTable10 = 0x1a0, + SST_fogTable11 = 0x1a4, + SST_fogTable12 = 0x1a8, + SST_fogTable13 = 0x1ac, + SST_fogTable14 = 0x1b0, + SST_fogTable15 = 0x1b4, + SST_fogTable16 = 0x1b8, + SST_fogTable17 = 0x1bc, + SST_fogTable18 = 0x1c0, + SST_fogTable19 = 0x1c4, + SST_fogTable1a = 0x1c8, + SST_fogTable1b = 0x1cc, + SST_fogTable1c = 0x1d0, + SST_fogTable1d = 0x1d4, + SST_fogTable1e = 0x1d8, + SST_fogTable1f = 0x1dc, + + SST_cmdFifoBaseAddr = 0x1e0, + SST_cmdFifoBump = 0x1e4, + SST_cmdFifoRdPtr = 0x1e8, + SST_cmdFifoAMin = 0x1ec, + SST_cmdFifoAMax = 0x1f0, + SST_cmdFifoDepth = 0x1f4, + SST_cmdFifoHoles = 0x1f8, + + SST_colBufferAddr = 0x1ec, /*Banshee*/ + SST_colBufferStride = 0x1f0, /*Banshee*/ + SST_auxBufferAddr = 0x1f4, /*Banshee*/ + SST_auxBufferStride = 0x1f8, /*Banshee*/ + + SST_clipLeftRight1 = 0x200, /*Banshee*/ + SST_clipTopBottom1 = 0x204, /*Banshee*/ + + SST_fbiInit4 = 0x200, + SST_vRetrace = 0x204, + SST_backPorch = 0x208, + SST_videoDimensions = 0x20c, + SST_fbiInit0 = 0x210, + SST_fbiInit1 = 0x214, + SST_fbiInit2 = 0x218, + SST_fbiInit3 = 0x21c, + SST_hSync = 0x220, + SST_vSync = 0x224, + SST_clutData = 0x228, + SST_dacData = 0x22c, + + SST_scrFilter = 0x230, + + SST_hvRetrace = 0x240, + SST_fbiInit5 = 0x244, + SST_fbiInit6 = 0x248, + SST_fbiInit7 = 0x24c, + + SST_swapPending = 0x24c, /*Banshee*/ + SST_leftOverlayBuf = 0x250, /*Banshee*/ + + SST_sSetupMode = 0x260, + SST_sVx = 0x264, + SST_sVy = 0x268, + SST_sARGB = 0x26c, + SST_sRed = 0x270, + SST_sGreen = 0x274, + SST_sBlue = 0x278, + SST_sAlpha = 0x27c, + SST_sVz = 0x280, + SST_sWb = 0x284, + SST_sW0 = 0x288, + SST_sS0 = 0x28c, + SST_sT0 = 0x290, + SST_sW1 = 0x294, + SST_sS1 = 0x298, + SST_sT1 = 0x29c, + + SST_sDrawTriCMD = 0x2a0, + SST_sBeginTriCMD = 0x2a4, + + SST_bltSrcBaseAddr = 0x2c0, + SST_bltDstBaseAddr = 0x2c4, + SST_bltXYStrides = 0x2c8, + SST_bltSrcChromaRange = 0x2cc, + SST_bltDstChromaRange = 0x2d0, + SST_bltClipX = 0x2d4, + SST_bltClipY = 0x2d8, + + SST_bltSrcXY = 0x2e0, + SST_bltDstXY = 0x2e4, + SST_bltSize = 0x2e8, + SST_bltRop = 0x2ec, + SST_bltColor = 0x2f0, + + SST_bltCommand = 0x2f8, + SST_bltData = 0x2fc, + + SST_textureMode = 0x300, + SST_tLOD = 0x304, + SST_tDetail = 0x308, + SST_texBaseAddr = 0x30c, + SST_texBaseAddr1 = 0x310, + SST_texBaseAddr2 = 0x314, + SST_texBaseAddr38 = 0x318, + + SST_trexInit1 = 0x320, + + SST_nccTable0_Y0 = 0x324, + SST_nccTable0_Y1 = 0x328, + SST_nccTable0_Y2 = 0x32c, + SST_nccTable0_Y3 = 0x330, + SST_nccTable0_I0 = 0x334, + SST_nccTable0_I1 = 0x338, + SST_nccTable0_I2 = 0x33c, + SST_nccTable0_I3 = 0x340, + SST_nccTable0_Q0 = 0x344, + SST_nccTable0_Q1 = 0x348, + SST_nccTable0_Q2 = 0x34c, + SST_nccTable0_Q3 = 0x350, + + SST_nccTable1_Y0 = 0x354, + SST_nccTable1_Y1 = 0x358, + SST_nccTable1_Y2 = 0x35c, + SST_nccTable1_Y3 = 0x360, + SST_nccTable1_I0 = 0x364, + SST_nccTable1_I1 = 0x368, + SST_nccTable1_I2 = 0x36c, + SST_nccTable1_I3 = 0x370, + SST_nccTable1_Q0 = 0x374, + SST_nccTable1_Q1 = 0x378, + SST_nccTable1_Q2 = 0x37c, + SST_nccTable1_Q3 = 0x380, + + SST_remap_status = 0x000 | 0x400, + + SST_remap_vertexAx = 0x008 | 0x400, + SST_remap_vertexAy = 0x00c | 0x400, + SST_remap_vertexBx = 0x010 | 0x400, + SST_remap_vertexBy = 0x014 | 0x400, + SST_remap_vertexCx = 0x018 | 0x400, + SST_remap_vertexCy = 0x01c | 0x400, + + SST_remap_startR = 0x0020 | 0x400, + SST_remap_startG = 0x002c | 0x400, + SST_remap_startB = 0x0038 | 0x400, + SST_remap_startZ = 0x0044 | 0x400, + SST_remap_startA = 0x0050 | 0x400, + SST_remap_startS = 0x005c | 0x400, + SST_remap_startT = 0x0068 | 0x400, + SST_remap_startW = 0x0074 | 0x400, + + SST_remap_dRdX = 0x0024 | 0x400, + SST_remap_dGdX = 0x0030 | 0x400, + SST_remap_dBdX = 0x003c | 0x400, + SST_remap_dZdX = 0x0048 | 0x400, + SST_remap_dAdX = 0x0054 | 0x400, + SST_remap_dSdX = 0x0060 | 0x400, + SST_remap_dTdX = 0x006c | 0x400, + SST_remap_dWdX = 0x0078 | 0x400, + + SST_remap_dRdY = 0x0028 | 0x400, + SST_remap_dGdY = 0x0034 | 0x400, + SST_remap_dBdY = 0x0040 | 0x400, + SST_remap_dZdY = 0x004c | 0x400, + SST_remap_dAdY = 0x0058 | 0x400, + SST_remap_dSdY = 0x0064 | 0x400, + SST_remap_dTdY = 0x0070 | 0x400, + SST_remap_dWdY = 0x007c | 0x400, + + SST_remap_triangleCMD = 0x0080 | 0x400, + + SST_remap_fvertexAx = 0x088 | 0x400, + SST_remap_fvertexAy = 0x08c | 0x400, + SST_remap_fvertexBx = 0x090 | 0x400, + SST_remap_fvertexBy = 0x094 | 0x400, + SST_remap_fvertexCx = 0x098 | 0x400, + SST_remap_fvertexCy = 0x09c | 0x400, + + SST_remap_fstartR = 0x00a0 | 0x400, + SST_remap_fstartG = 0x00ac | 0x400, + SST_remap_fstartB = 0x00b8 | 0x400, + SST_remap_fstartZ = 0x00c4 | 0x400, + SST_remap_fstartA = 0x00d0 | 0x400, + SST_remap_fstartS = 0x00dc | 0x400, + SST_remap_fstartT = 0x00e8 | 0x400, + SST_remap_fstartW = 0x00f4 | 0x400, + + SST_remap_fdRdX = 0x00a4 | 0x400, + SST_remap_fdGdX = 0x00b0 | 0x400, + SST_remap_fdBdX = 0x00bc | 0x400, + SST_remap_fdZdX = 0x00c8 | 0x400, + SST_remap_fdAdX = 0x00d4 | 0x400, + SST_remap_fdSdX = 0x00e0 | 0x400, + SST_remap_fdTdX = 0x00ec | 0x400, + SST_remap_fdWdX = 0x00f8 | 0x400, + + SST_remap_fdRdY = 0x00a8 | 0x400, + SST_remap_fdGdY = 0x00b4 | 0x400, + SST_remap_fdBdY = 0x00c0 | 0x400, + SST_remap_fdZdY = 0x00cc | 0x400, + SST_remap_fdAdY = 0x00d8 | 0x400, + SST_remap_fdSdY = 0x00e4 | 0x400, + SST_remap_fdTdY = 0x00f0 | 0x400, + SST_remap_fdWdY = 0x00fc | 0x400, +}; + +enum +{ + LFB_WRITE_FRONT = 0x0000, + LFB_WRITE_BACK = 0x0010, + LFB_WRITE_MASK = 0x0030 +}; + +enum +{ + LFB_READ_FRONT = 0x0000, + LFB_READ_BACK = 0x0040, + LFB_READ_AUX = 0x0080, + LFB_READ_MASK = 0x00c0 +}; + +enum +{ + LFB_FORMAT_RGB565 = 0, + LFB_FORMAT_RGB555 = 1, + LFB_FORMAT_ARGB1555 = 2, + LFB_FORMAT_ARGB8888 = 5, + LFB_FORMAT_DEPTH = 15, + LFB_FORMAT_MASK = 15 +}; + +enum +{ + LFB_WRITE_COLOUR = 1, + LFB_WRITE_DEPTH = 2 +}; + +enum +{ + FBZ_CHROMAKEY = (1 << 1), + FBZ_W_BUFFER = (1 << 3), + FBZ_DEPTH_ENABLE = (1 << 4), + + FBZ_DITHER = (1 << 8), + FBZ_RGB_WMASK = (1 << 9), + FBZ_DEPTH_WMASK = (1 << 10), + FBZ_DITHER_2x2 = (1 << 11), + + FBZ_DRAW_FRONT = 0x0000, + FBZ_DRAW_BACK = 0x4000, + FBZ_DRAW_MASK = 0xc000, + + FBZ_DEPTH_BIAS = (1 << 16), + + FBZ_DEPTH_SOURCE = (1 << 20), + + FBZ_PARAM_ADJUST = (1 << 26) +}; + +enum +{ + TEX_RGB332 = 0x0, + TEX_Y4I2Q2 = 0x1, + TEX_A8 = 0x2, + TEX_I8 = 0x3, + TEX_AI8 = 0x4, + TEX_PAL8 = 0x5, + TEX_APAL8 = 0x6, + TEX_ARGB8332 = 0x8, + TEX_A8Y4I2Q2 = 0x9, + TEX_R5G6B5 = 0xa, + TEX_ARGB1555 = 0xb, + TEX_ARGB4444 = 0xc, + TEX_A8I8 = 0xd, + TEX_APAL88 = 0xe +}; + +enum +{ + TEXTUREMODE_NCC_SEL = (1 << 5), + TEXTUREMODE_TCLAMPS = (1 << 6), + TEXTUREMODE_TCLAMPT = (1 << 7), + TEXTUREMODE_TRILINEAR = (1 << 30) +}; + +enum +{ + FBIINIT0_VGA_PASS = 1, + FBIINIT0_GRAPHICS_RESET = (1 << 1) +}; + +enum +{ + FBIINIT1_MULTI_SST = (1 << 2), /*Voodoo Graphics only*/ + FBIINIT1_VIDEO_RESET = (1 << 8), + FBIINIT1_SLI_ENABLE = (1 << 23) +}; + +enum +{ + FBIINIT2_SWAP_ALGORITHM_MASK = (3 << 9) +}; + +enum +{ + FBIINIT2_SWAP_ALGORITHM_DAC_VSYNC = (0 << 9), + FBIINIT2_SWAP_ALGORITHM_DAC_DATA = (1 << 9), + FBIINIT2_SWAP_ALGORITHM_PCI_FIFO_STALL = (2 << 9), + FBIINIT2_SWAP_ALGORITHM_SLI_SYNC = (3 << 9) +}; + +enum +{ + FBIINIT3_REMAP = 1 +}; + +enum +{ + FBIINIT5_MULTI_CVG = (1 << 14) +}; + +enum +{ + FBIINIT7_CMDFIFO_ENABLE = (1 << 8) +}; + +enum +{ + CC_LOCALSELECT_ITER_RGB = 0, + CC_LOCALSELECT_TEX = 1, + CC_LOCALSELECT_COLOR1 = 2, + CC_LOCALSELECT_LFB = 3 +}; + +enum +{ + CCA_LOCALSELECT_ITER_A = 0, + CCA_LOCALSELECT_COLOR0 = 1, + CCA_LOCALSELECT_ITER_Z = 2 +}; + +enum +{ + C_SEL_ITER_RGB = 0, + C_SEL_TEX = 1, + C_SEL_COLOR1 = 2, + C_SEL_LFB = 3 +}; + +enum +{ + A_SEL_ITER_A = 0, + A_SEL_TEX = 1, + A_SEL_COLOR1 = 2, + A_SEL_LFB = 3 +}; + +enum +{ + CC_MSELECT_ZERO = 0, + CC_MSELECT_CLOCAL = 1, + CC_MSELECT_AOTHER = 2, + CC_MSELECT_ALOCAL = 3, + CC_MSELECT_TEX = 4, + CC_MSELECT_TEXRGB = 5 +}; + +enum +{ + CCA_MSELECT_ZERO = 0, + CCA_MSELECT_ALOCAL = 1, + CCA_MSELECT_AOTHER = 2, + CCA_MSELECT_ALOCAL2 = 3, + CCA_MSELECT_TEX = 4 +}; + +enum +{ + TC_MSELECT_ZERO = 0, + TC_MSELECT_CLOCAL = 1, + TC_MSELECT_AOTHER = 2, + TC_MSELECT_ALOCAL = 3, + TC_MSELECT_DETAIL = 4, + TC_MSELECT_LOD_FRAC = 5 +}; + +enum +{ + TCA_MSELECT_ZERO = 0, + TCA_MSELECT_CLOCAL = 1, + TCA_MSELECT_AOTHER = 2, + TCA_MSELECT_ALOCAL = 3, + TCA_MSELECT_DETAIL = 4, + TCA_MSELECT_LOD_FRAC = 5 +}; + +enum +{ + CC_ADD_CLOCAL = 1, + CC_ADD_ALOCAL = 2 +}; + +enum +{ + CCA_ADD_CLOCAL = 1, + CCA_ADD_ALOCAL = 2 +}; + +enum +{ + AFUNC_AZERO = 0x0, + AFUNC_ASRC_ALPHA = 0x1, + AFUNC_A_COLOR = 0x2, + AFUNC_ADST_ALPHA = 0x3, + AFUNC_AONE = 0x4, + AFUNC_AOMSRC_ALPHA = 0x5, + AFUNC_AOM_COLOR = 0x6, + AFUNC_AOMDST_ALPHA = 0x7, + AFUNC_ASATURATE = 0xf +}; + +enum +{ + AFUNC_ACOLORBEFOREFOG = 0xf +}; + +enum +{ + AFUNC_NEVER = 0, + AFUNC_LESSTHAN = 1, + AFUNC_EQUAL = 2, + AFUNC_LESSTHANEQUAL = 3, + AFUNC_GREATERTHAN = 4, + AFUNC_NOTEQUAL = 5, + AFUNC_GREATERTHANEQUAL = 6, + AFUNC_ALWAYS = 7 +}; + +enum +{ + DEPTHOP_NEVER = 0, + DEPTHOP_LESSTHAN = 1, + DEPTHOP_EQUAL = 2, + DEPTHOP_LESSTHANEQUAL = 3, + DEPTHOP_GREATERTHAN = 4, + DEPTHOP_NOTEQUAL = 5, + DEPTHOP_GREATERTHANEQUAL = 6, + DEPTHOP_ALWAYS = 7 +}; + +enum +{ + FOG_ENABLE = 0x01, + FOG_ADD = 0x02, + FOG_MULT = 0x04, + FOG_ALPHA = 0x08, + FOG_Z = 0x10, + FOG_W = 0x18, + FOG_CONSTANT = 0x20 +}; + +enum +{ + LOD_ODD = (1 << 18), + LOD_SPLIT = (1 << 19), + LOD_S_IS_WIDER = (1 << 20), + LOD_TMULTIBASEADDR = (1 << 24), + LOD_TMIRROR_S = (1 << 28), + LOD_TMIRROR_T = (1 << 29) +}; +enum +{ + CMD_INVALID = 0, + CMD_DRAWTRIANGLE, + CMD_FASTFILL, + CMD_SWAPBUF +}; + +enum +{ + FBZCP_TEXTURE_ENABLED = (1 << 27) +}; + +enum +{ + BLTCMD_SRC_TILED = (1 << 14), + BLTCMD_DST_TILED = (1 << 15) +}; + +enum +{ + INITENABLE_SLI_MASTER_SLAVE = (1 << 11) +}; + +enum +{ + SETUPMODE_RGB = (1 << 0), + SETUPMODE_ALPHA = (1 << 1), + SETUPMODE_Z = (1 << 2), + SETUPMODE_Wb = (1 << 3), + SETUPMODE_W0 = (1 << 4), + SETUPMODE_S0_T0 = (1 << 5), + SETUPMODE_W1 = (1 << 6), + SETUPMODE_S1_T1 = (1 << 7), + + SETUPMODE_STRIP_MODE = (1 << 16), + SETUPMODE_CULLING_ENABLE = (1 << 17), + SETUPMODE_CULLING_SIGN = (1 << 18), + SETUPMODE_DISABLE_PINGPONG = (1 << 19) +}; + +#define TEXTUREMODE_MASK 0x3ffff000 +#define TEXTUREMODE_PASSTHROUGH 0 + +#define TEXTUREMODE_LOCAL_MASK 0x00643000 +#define TEXTUREMODE_LOCAL 0x00241000 + + +#define SLI_ENABLED (voodoo->fbiInit1 & FBIINIT1_SLI_ENABLE) +#define TRIPLE_BUFFER ((voodoo->fbiInit2 & 0x10) || (voodoo->fbiInit5 & 0x600) == 0x400) + + +#define _rgb_sel ( params->fbzColorPath & 3) +#define a_sel ( (params->fbzColorPath >> 2) & 3) +#define cc_localselect ( params->fbzColorPath & (1 << 4)) +#define cca_localselect ( (params->fbzColorPath >> 5) & 3) +#define cc_localselect_override ( params->fbzColorPath & (1 << 7)) +#define cc_zero_other ( params->fbzColorPath & (1 << 8)) +#define cc_sub_clocal ( params->fbzColorPath & (1 << 9)) +#define cc_mselect ( (params->fbzColorPath >> 10) & 7) +#define cc_reverse_blend ( params->fbzColorPath & (1 << 13)) +#define cc_add ( (params->fbzColorPath >> 14) & 3) +#define cc_add_alocal ( params->fbzColorPath & (1 << 15)) +#define cc_invert_output ( params->fbzColorPath & (1 << 16)) +#define cca_zero_other ( params->fbzColorPath & (1 << 17)) +#define cca_sub_clocal ( params->fbzColorPath & (1 << 18)) +#define cca_mselect ( (params->fbzColorPath >> 19) & 7) +#define cca_reverse_blend ( params->fbzColorPath & (1 << 22)) +#define cca_add ( (params->fbzColorPath >> 23) & 3) +#define cca_invert_output ( params->fbzColorPath & (1 << 25)) +#define tc_zero_other (params->textureMode[0] & (1 << 12)) +#define tc_sub_clocal (params->textureMode[0] & (1 << 13)) +#define tc_mselect ((params->textureMode[0] >> 14) & 7) +#define tc_reverse_blend (params->textureMode[0] & (1 << 17)) +#define tc_add_clocal (params->textureMode[0] & (1 << 18)) +#define tc_add_alocal (params->textureMode[0] & (1 << 19)) +#define tc_invert_output (params->textureMode[0] & (1 << 20)) +#define tca_zero_other (params->textureMode[0] & (1 << 21)) +#define tca_sub_clocal (params->textureMode[0] & (1 << 22)) +#define tca_mselect ((params->textureMode[0] >> 23) & 7) +#define tca_reverse_blend (params->textureMode[0] & (1 << 26)) +#define tca_add_clocal (params->textureMode[0] & (1 << 27)) +#define tca_add_alocal (params->textureMode[0] & (1 << 28)) +#define tca_invert_output (params->textureMode[0] & (1 << 29)) + +#define tc_sub_clocal_1 (params->textureMode[1] & (1 << 13)) +#define tc_mselect_1 ((params->textureMode[1] >> 14) & 7) +#define tc_reverse_blend_1 (params->textureMode[1] & (1 << 17)) +#define tc_add_clocal_1 (params->textureMode[1] & (1 << 18)) +#define tc_add_alocal_1 (params->textureMode[1] & (1 << 19)) +#define tca_sub_clocal_1 (params->textureMode[1] & (1 << 22)) +#define tca_mselect_1 ((params->textureMode[1] >> 23) & 7) +#define tca_reverse_blend_1 (params->textureMode[1] & (1 << 26)) +#define tca_add_clocal_1 (params->textureMode[1] & (1 << 27)) +#define tca_add_alocal_1 (params->textureMode[1] & (1 << 28)) + +#define src_afunc ( (params->alphaMode >> 8) & 0xf) +#define dest_afunc ( (params->alphaMode >> 12) & 0xf) +#define alpha_func ( (params->alphaMode >> 1) & 7) +#define a_ref ( params->alphaMode >> 24) +#define depth_op ( (params->fbzMode >> 5) & 7) +#define dither ( params->fbzMode & FBZ_DITHER) +#define dither2x2 (params->fbzMode & FBZ_DITHER_2x2) diff --git a/src/include/86box/vid_voodoo_render.h b/src/include/86box/vid_voodoo_render.h new file mode 100644 index 000000000..1cb74fa5f --- /dev/null +++ b/src/include/86box/vid_voodoo_render.h @@ -0,0 +1,338 @@ +#if !(defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__) +#define NO_CODEGEN +#endif + +#ifndef NO_CODEGEN +void voodoo_codegen_init(voodoo_t *voodoo); +void voodoo_codegen_close(voodoo_t *voodoo); +#endif + +#define DEPTH_TEST(comp_depth) \ + do \ + { \ + switch (depth_op) \ + { \ + case DEPTHOP_NEVER: \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + case DEPTHOP_LESSTHAN: \ + if (!(comp_depth < old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_EQUAL: \ + if (!(comp_depth == old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_LESSTHANEQUAL: \ + if (!(comp_depth <= old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_GREATERTHAN: \ + if (!(comp_depth > old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_NOTEQUAL: \ + if (!(comp_depth != old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_GREATERTHANEQUAL: \ + if (!(comp_depth >= old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_ALWAYS: \ + break; \ + } \ + } while (0) + +#define APPLY_FOG(src_r, src_g, src_b, z, ia, w) \ + do \ + { \ + if (params->fogMode & FOG_CONSTANT) \ + { \ + src_r += params->fogColor.r; \ + src_g += params->fogColor.g; \ + src_b += params->fogColor.b; \ + } \ + else \ + { \ + int fog_r, fog_g, fog_b, fog_a = 0; \ + int fog_idx; \ + \ + if (!(params->fogMode & FOG_ADD)) \ + { \ + fog_r = params->fogColor.r; \ + fog_g = params->fogColor.g; \ + fog_b = params->fogColor.b; \ + } \ + else \ + fog_r = fog_g = fog_b = 0; \ + \ + if (!(params->fogMode & FOG_MULT)) \ + { \ + fog_r -= src_r; \ + fog_g -= src_g; \ + fog_b -= src_b; \ + } \ + \ + switch (params->fogMode & (FOG_Z|FOG_ALPHA)) \ + { \ + case 0: \ + fog_idx = (w_depth >> 10) & 0x3f; \ + \ + fog_a = params->fogTable[fog_idx].fog; \ + fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10; \ + break; \ + case FOG_Z: \ + fog_a = (z >> 20) & 0xff; \ + break; \ + case FOG_ALPHA: \ + fog_a = CLAMP(ia >> 12); \ + break; \ + case FOG_W: \ + fog_a = CLAMP((w >> 32) & 0xff); \ + break; \ + } \ + fog_a++; \ + \ + fog_r = (fog_r * fog_a) >> 8; \ + fog_g = (fog_g * fog_a) >> 8; \ + fog_b = (fog_b * fog_a) >> 8; \ + \ + if (params->fogMode & FOG_MULT) \ + { \ + src_r = fog_r; \ + src_g = fog_g; \ + src_b = fog_b; \ + } \ + else \ + { \ + src_r += fog_r; \ + src_g += fog_g; \ + src_b += fog_b; \ + } \ + } \ + \ + src_r = CLAMP(src_r); \ + src_g = CLAMP(src_g); \ + src_b = CLAMP(src_b); \ + } while (0) + +#define ALPHA_TEST(src_a) \ + do \ + { \ + switch (alpha_func) \ + { \ + case AFUNC_NEVER: \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + case AFUNC_LESSTHAN: \ + if (!(src_a < a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_EQUAL: \ + if (!(src_a == a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_LESSTHANEQUAL: \ + if (!(src_a <= a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_GREATERTHAN: \ + if (!(src_a > a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_NOTEQUAL: \ + if (!(src_a != a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_GREATERTHANEQUAL: \ + if (!(src_a >= a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_ALWAYS: \ + break; \ + } \ + } while (0) + +#define ALPHA_BLEND(src_r, src_g, src_b, src_a) \ + do \ + { \ + int _a; \ + int newdest_r = 0, newdest_g = 0, newdest_b = 0; \ + \ + switch (dest_afunc) \ + { \ + case AFUNC_AZERO: \ + newdest_r = newdest_g = newdest_b = 0; \ + break; \ + case AFUNC_ASRC_ALPHA: \ + newdest_r = (dest_r * src_a) / 255; \ + newdest_g = (dest_g * src_a) / 255; \ + newdest_b = (dest_b * src_a) / 255; \ + break; \ + case AFUNC_A_COLOR: \ + newdest_r = (dest_r * src_r) / 255; \ + newdest_g = (dest_g * src_g) / 255; \ + newdest_b = (dest_b * src_b) / 255; \ + break; \ + case AFUNC_ADST_ALPHA: \ + newdest_r = (dest_r * dest_a) / 255; \ + newdest_g = (dest_g * dest_a) / 255; \ + newdest_b = (dest_b * dest_a) / 255; \ + break; \ + case AFUNC_AONE: \ + newdest_r = dest_r; \ + newdest_g = dest_g; \ + newdest_b = dest_b; \ + break; \ + case AFUNC_AOMSRC_ALPHA: \ + newdest_r = (dest_r * (255-src_a)) / 255; \ + newdest_g = (dest_g * (255-src_a)) / 255; \ + newdest_b = (dest_b * (255-src_a)) / 255; \ + break; \ + case AFUNC_AOM_COLOR: \ + newdest_r = (dest_r * (255-src_r)) / 255; \ + newdest_g = (dest_g * (255-src_g)) / 255; \ + newdest_b = (dest_b * (255-src_b)) / 255; \ + break; \ + case AFUNC_AOMDST_ALPHA: \ + newdest_r = (dest_r * (255-dest_a)) / 255; \ + newdest_g = (dest_g * (255-dest_a)) / 255; \ + newdest_b = (dest_b * (255-dest_a)) / 255; \ + break; \ + case AFUNC_ASATURATE: \ + _a = MIN(src_a, 1-dest_a); \ + newdest_r = (dest_r * _a) / 255; \ + newdest_g = (dest_g * _a) / 255; \ + newdest_b = (dest_b * _a) / 255; \ + break; \ + } \ + \ + switch (src_afunc) \ + { \ + case AFUNC_AZERO: \ + src_r = src_g = src_b = 0; \ + break; \ + case AFUNC_ASRC_ALPHA: \ + src_r = (src_r * src_a) / 255; \ + src_g = (src_g * src_a) / 255; \ + src_b = (src_b * src_a) / 255; \ + break; \ + case AFUNC_A_COLOR: \ + src_r = (src_r * dest_r) / 255; \ + src_g = (src_g * dest_g) / 255; \ + src_b = (src_b * dest_b) / 255; \ + break; \ + case AFUNC_ADST_ALPHA: \ + src_r = (src_r * dest_a) / 255; \ + src_g = (src_g * dest_a) / 255; \ + src_b = (src_b * dest_a) / 255; \ + break; \ + case AFUNC_AONE: \ + break; \ + case AFUNC_AOMSRC_ALPHA: \ + src_r = (src_r * (255-src_a)) / 255; \ + src_g = (src_g * (255-src_a)) / 255; \ + src_b = (src_b * (255-src_a)) / 255; \ + break; \ + case AFUNC_AOM_COLOR: \ + src_r = (src_r * (255-dest_r)) / 255; \ + src_g = (src_g * (255-dest_g)) / 255; \ + src_b = (src_b * (255-dest_b)) / 255; \ + break; \ + case AFUNC_AOMDST_ALPHA: \ + src_r = (src_r * (255-dest_a)) / 255; \ + src_g = (src_g * (255-dest_a)) / 255; \ + src_b = (src_b * (255-dest_a)) / 255; \ + break; \ + case AFUNC_ACOLORBEFOREFOG: \ + fatal("AFUNC_ACOLORBEFOREFOG\n"); \ + break; \ + } \ + \ + src_r += newdest_r; \ + src_g += newdest_g; \ + src_b += newdest_b; \ + \ + src_r = CLAMP(src_r); \ + src_g = CLAMP(src_g); \ + src_b = CLAMP(src_b); \ + } while(0) + + + +void voodoo_render_thread_1(void *param); +void voodoo_render_thread_2(void *param); +void voodoo_render_thread_3(void *param); +void voodoo_render_thread_4(void *param); +void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params); + +extern int voodoo_recomp; +extern int tris; + +static __inline void voodoo_wake_render_thread(voodoo_t *voodoo) +{ + thread_set_event(voodoo->wake_render_thread[0]); /*Wake up render thread if moving from idle*/ + if (voodoo->render_threads >= 2) + thread_set_event(voodoo->wake_render_thread[1]); /*Wake up render thread if moving from idle*/ + if (voodoo->render_threads == 4) + { + thread_set_event(voodoo->wake_render_thread[2]); /*Wake up render thread if moving from idle*/ + thread_set_event(voodoo->wake_render_thread[3]); /*Wake up render thread if moving from idle*/ + } +} + +static __inline void voodoo_wait_for_render_thread_idle(voodoo_t *voodoo) +{ + while (!PARAM_EMPTY(0) || (voodoo->render_threads >= 2 && !PARAM_EMPTY(1)) || + (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || !PARAM_EMPTY(3))) || + voodoo->render_voodoo_busy[0] || (voodoo->render_threads >= 2 && voodoo->render_voodoo_busy[1]) || + (voodoo->render_threads == 4 && (voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3]))) + { + voodoo_wake_render_thread(voodoo); + if (!PARAM_EMPTY(0) || voodoo->render_voodoo_busy[0]) + thread_wait_event(voodoo->render_not_full_event[0], 1); + if (voodoo->render_threads >= 2 && (!PARAM_EMPTY(1) || voodoo->render_voodoo_busy[1])) + thread_wait_event(voodoo->render_not_full_event[1], 1); + if (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || voodoo->render_voodoo_busy[2])) + thread_wait_event(voodoo->render_not_full_event[2], 1); + if (voodoo->render_threads == 4 && (!PARAM_EMPTY(3) || voodoo->render_voodoo_busy[3])) + thread_wait_event(voodoo->render_not_full_event[3], 1); + } +} diff --git a/src/include/86box/vid_voodoo_setup.h b/src/include/86box/vid_voodoo_setup.h new file mode 100644 index 000000000..19056dd6b --- /dev/null +++ b/src/include/86box/vid_voodoo_setup.h @@ -0,0 +1,18 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ +void voodoo_triangle_setup(voodoo_t *voodoo); diff --git a/src/include/86box/vid_voodoo_texture.h b/src/include/86box/vid_voodoo_texture.h new file mode 100644 index 000000000..40c2b739a --- /dev/null +++ b/src/include/86box/vid_voodoo_texture.h @@ -0,0 +1,36 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Graphics, 2, Banshee, 3 emulation. + * + * + * + * Authors: Sarah Walker, + * leilei + * + * Copyright 2008-2020 Sarah Walker. + */ +static const uint32_t texture_offset[LOD_MAX+3] = +{ + 0, + 256*256, + 256*256 + 128*128, + 256*256 + 128*128 + 64*64, + 256*256 + 128*128 + 64*64 + 32*32, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1 + 1 +}; + +void voodoo_recalc_tex(voodoo_t *voodoo, int tmu); +void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu); +void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p); +void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu); diff --git a/src/include/86box/video.h b/src/include/86box/video.h index 5db33976e..8997d3857 100644 --- a/src/include/86box/video.h +++ b/src/include/86box/video.h @@ -362,6 +362,10 @@ extern const device_t ps1vga_mca_device; /* 3DFX Voodoo Graphics */ extern const device_t voodoo_device; +extern const device_t voodoo_banshee_device; +extern const device_t creative_voodoo_banshee_device; +extern const device_t voodoo_3_2000_device; +extern const device_t voodoo_3_3000_device; /* Wyse 700 */ extern const device_t wy700_device; diff --git a/src/machine/m_at_286_386sx.c b/src/machine/m_at_286_386sx.c index 45f4fab1c..16a21b0e0 100644 --- a/src/machine/m_at_286_386sx.c +++ b/src/machine/m_at_286_386sx.c @@ -225,7 +225,8 @@ machine_at_micronics386_init(const machine_t *model) static void machine_at_scat_init(const machine_t *model, int is_v4) { - machine_at_init(model); + machine_at_common_init(model); + device_add(&keyboard_at_ami_device); device_add(&fdc_at_device); if (is_v4) diff --git a/src/machine/m_at_socket7.c b/src/machine/m_at_socket7.c index af7016e8f..c2d2d6e5d 100644 --- a/src/machine/m_at_socket7.c +++ b/src/machine/m_at_socket7.c @@ -913,7 +913,7 @@ machine_at_an430tx_init(const machine_t *model) L"roms/machines/an430tx/P10-0095.BI1", L"roms/machines/an430tx/P10-0095.BI2", L"roms/machines/an430tx/P10-0095.BI3", - NULL, + L"roms/machines/an430tx/P10-0095.RCV", 0x3a000, 160); if (bios_only || !ret) @@ -924,11 +924,11 @@ machine_at_an430tx_init(const machine_t *model) pci_init(PCI_CONFIG_TYPE_1); pci_register_slot(0x00, PCI_CARD_NORTHBRIDGE, 0, 0, 0, 0); pci_register_slot(0x07, PCI_CARD_SOUTHBRIDGE, 1, 2, 3, 4); /* PIIX4 */ + pci_register_slot(0x08, PCI_CARD_VIDEO, 4, 0, 0, 0); pci_register_slot(0x0D, PCI_CARD_NORMAL, 1, 2, 3, 4); pci_register_slot(0x0E, PCI_CARD_NORMAL, 2, 3, 4, 1); pci_register_slot(0x0F, PCI_CARD_NORMAL, 3, 4, 1, 2); pci_register_slot(0x10, PCI_CARD_NORMAL, 4, 1, 2, 3); - pci_register_slot(0x08, PCI_CARD_VIDEO, 4, 0, 0, 0); device_add(&i430tx_device); device_add(&piix4_device); device_add(&keyboard_ps2_ami_pci_device); @@ -1028,7 +1028,7 @@ machine_at_ficva502_init(const machine_t *model) device_add(&via_vpx_device); device_add(&via_vt82c586b_device); device_add(&keyboard_ps2_pci_device); - device_add(&fdc37c669_device); + device_add(&fdc37c669_370_device); device_add(&sst_flash_29ee010_device); spd_register(SPD_TYPE_SDRAM, 0x3, 256); @@ -1062,7 +1062,7 @@ machine_at_ficpa2012_init(const machine_t *model) device_add(&via_vt82c586b_device); device_add(&keyboard_ps2_pci_device); device_add(&w83877f_device); - device_add(&sst_flash_39sf010_device); + device_add(&sst_flash_29ee010_device); spd_register(SPD_TYPE_SDRAM, 0x7, 512); return ret; diff --git a/src/machine/m_at_socket8.c b/src/machine/m_at_socket8.c index 648598ab6..5a1206ba3 100644 --- a/src/machine/m_at_socket8.c +++ b/src/machine/m_at_socket8.c @@ -268,7 +268,7 @@ machine_at_p65up5_common_init(const machine_t *model, const device_t *northbridg device_add(&piix3_device); device_add(&keyboard_ps2_ami_pci_device); device_add(&w83877f_device); - device_add(&intel_flash_bxt_device); + device_add(&sst_flash_29ee010_device); device_add(&ioapic_device); } diff --git a/src/mem/intel_flash.c b/src/mem/intel_flash.c index 4d0c8b6f3..39922c83c 100644 --- a/src/mem/intel_flash.c +++ b/src/mem/intel_flash.c @@ -40,8 +40,8 @@ enum { BLOCK_MAIN1, BLOCK_MAIN2, - BLOCK_MAIN3, - BLOCK_MAIN4, + BLOCK_MAIN3, + BLOCK_MAIN4, BLOCK_DATA1, BLOCK_DATA2, BLOCK_BOOT, @@ -185,7 +185,7 @@ flash_write(uint32_t addr, uint8_t val, void *p) switch (dev->command) { case CMD_ERASE_SETUP: if (val == CMD_ERASE_CONFIRM) { - for (i = 0; i < 3; i++) { + for (i = 0; i < 6; i++) { if ((i == dev->program_addr) && (addr >= dev->block_start[i]) && (addr <= dev->block_end[i])) memset(&(dev->array[dev->block_start[i]]), 0xff, dev->block_len[i]); } @@ -197,7 +197,7 @@ flash_write(uint32_t addr, uint8_t val, void *p) case CMD_PROGRAM_SETUP: case CMD_PROGRAM_SETUP_ALT: - if (((addr & bb_mask) != (dev->block_start[4] & bb_mask)) && (addr == dev->program_addr)) + if (((addr & bb_mask) != (dev->block_start[6] & bb_mask)) && (addr == dev->program_addr)) dev->array[addr] = val; dev->command = CMD_READ_STATUS; dev->status = 0x80; @@ -210,7 +210,7 @@ flash_write(uint32_t addr, uint8_t val, void *p) dev->status = 0; break; case CMD_ERASE_SETUP: - for (i = 0; i < 3; i++) { + for (i = 0; i < 7; i++) { if ((addr >= dev->block_start[i]) && (addr <= dev->block_end[i])) dev->program_addr = i; } @@ -242,7 +242,7 @@ flash_writew(uint32_t addr, uint16_t val, void *p) if (dev->flags & FLAG_WORD) switch (dev->command) { case CMD_ERASE_SETUP: if (val == CMD_ERASE_CONFIRM) { - for (i = 0; i < 3; i++) { + for (i = 0; i < 6; i++) { if ((i == dev->program_addr) && (addr >= dev->block_start[i]) && (addr <= dev->block_end[i])) memset(&(dev->array[dev->block_start[i]]), 0xff, dev->block_len[i]); } @@ -254,7 +254,7 @@ flash_writew(uint32_t addr, uint16_t val, void *p) case CMD_PROGRAM_SETUP: case CMD_PROGRAM_SETUP_ALT: - if (((addr & bb_mask) != (dev->block_start[4] & bb_mask)) && (addr == dev->program_addr)) + if (((addr & bb_mask) != (dev->block_start[6] & bb_mask)) && (addr == dev->program_addr)) *(uint16_t *) (&dev->array[addr]) = val; dev->command = CMD_READ_STATUS; dev->status = 0x80; @@ -267,7 +267,7 @@ flash_writew(uint32_t addr, uint16_t val, void *p) dev->status = 0; break; case CMD_ERASE_SETUP: - for (i = 0; i < 3; i++) { + for (i = 0; i < 7; i++) { if ((addr >= dev->block_start[i]) && (addr <= dev->block_end[i])) dev->program_addr = i; } @@ -381,53 +381,52 @@ intel_flash_init(const device_t *info) dev->array = (uint8_t *) malloc(biosmask + 1); memset(dev->array, 0xff, biosmask + 1); - switch(biosmask){ + switch (biosmask) { case 0x7ffff: + if (dev->flags & FLAG_WORD) + dev->flash_id = (dev->flags & FLAG_BXB) ? 0x4471 : 0x4470; + else + dev->flash_id =(dev->flags & FLAG_BXB) ? 0x8A : 0x89; - if (dev->flags & FLAG_WORD) - dev->flash_id = (dev->flags & FLAG_BXB) ? 0x4471 :0x4470; - else - dev->flash_id =(dev->flags & FLAG_BXB) ? 0x8A : 0x89; + /* The block lengths are the same both flash types. */ + dev->block_len[BLOCK_MAIN1] = 0x20000; + dev->block_len[BLOCK_MAIN2] = 0x20000; + dev->block_len[BLOCK_MAIN3] = 0x20000; + dev->block_len[BLOCK_MAIN4] = 0x18000; + dev->block_len[BLOCK_DATA1] = 0x02000; + dev->block_len[BLOCK_DATA2] = 0x02000; + dev->block_len[BLOCK_BOOT] = 0x04000; - /* The block lengths are the same both flash types. */ - dev->block_len[BLOCK_MAIN1] = 0x20000; - dev->block_len[BLOCK_MAIN2] = 0x20000; - dev->block_len[BLOCK_MAIN3] = 0x20000; - dev->block_len[BLOCK_MAIN4] = 0x18000; - dev->block_len[BLOCK_DATA1] = 0x02000; - dev->block_len[BLOCK_DATA2] = 0x02000; - dev->block_len[BLOCK_BOOT] = 0x04000; - - if (dev->flags & FLAG_BXB) { /* 28F004BX-T/28F400BX-B */ - dev->block_start[BLOCK_BOOT] = 0x00000; /* MAIN BLOCK 1 */ - dev->block_end[BLOCK_BOOT] = 0x1ffff; - dev->block_start[BLOCK_DATA2] = 0x20000; /* MAIN BLOCK 2 */ - dev->block_end[BLOCK_DATA2] = 0x3ffff; - dev->block_start[BLOCK_DATA1] = 0x40000; /* MAIN BLOCK 3 */ - dev->block_end[BLOCK_DATA1] = 0x5ffff; - dev->block_start[BLOCK_MAIN4] = 0x60000; /* MAIN BLOCK 4 */ - dev->block_end[BLOCK_MAIN4] = 0x77fff; - dev->block_start[BLOCK_MAIN3] = 0x78000; /* DATA AREA 1 BLOCK */ - dev->block_end[BLOCK_MAIN3] = 0x79fff; - dev->block_start[BLOCK_MAIN2] = 0x7a000; /* DATA AREA 2 BLOCK */ - dev->block_end[BLOCK_MAIN2] = 0x7bfff; - dev->block_start[BLOCK_MAIN1] = 0x7c000; /* BOOT BLOCK */ - dev->block_end[BLOCK_MAIN1] = 0x7ffff; - } else { - dev->block_start[BLOCK_MAIN1] = 0x00000; /* MAIN BLOCK 1 */ - dev->block_end[BLOCK_MAIN1] = 0x1ffff; - dev->block_start[BLOCK_MAIN2] = 0x20000; /* MAIN BLOCK 2 */ - dev->block_end[BLOCK_MAIN2] = 0x3ffff; - dev->block_start[BLOCK_MAIN3] = 0x40000; /* MAIN BLOCK 3 */ - dev->block_end[BLOCK_MAIN3] = 0x5ffff; - dev->block_start[BLOCK_MAIN4] = 0x60000; /* MAIN BLOCK 4 */ - dev->block_end[BLOCK_MAIN4] = 0x77fff; - dev->block_start[BLOCK_DATA1] = 0x78000; /* DATA AREA 1 BLOCK */ - dev->block_end[BLOCK_DATA1] = 0x79fff; - dev->block_start[BLOCK_DATA2] = 0x7a000; /* DATA AREA 2 BLOCK */ - dev->block_end[BLOCK_DATA2] = 0x7bfff; - dev->block_start[BLOCK_BOOT] = 0x7c000; /* BOOT BLOCK */ - dev->block_end[BLOCK_BOOT] = 0x7ffff; + if (dev->flags & FLAG_BXB) { /* 28F004BX-T/28F400BX-B */ + dev->block_start[BLOCK_BOOT] = 0x00000; /* MAIN BLOCK 1 */ + dev->block_end[BLOCK_BOOT] = 0x1ffff; + dev->block_start[BLOCK_DATA2] = 0x20000; /* MAIN BLOCK 2 */ + dev->block_end[BLOCK_DATA2] = 0x3ffff; + dev->block_start[BLOCK_DATA1] = 0x40000; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_DATA1] = 0x5ffff; + dev->block_start[BLOCK_MAIN4] = 0x60000; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0x77fff; + dev->block_start[BLOCK_MAIN3] = 0x78000; /* DATA AREA 1 BLOCK */ + dev->block_end[BLOCK_MAIN3] = 0x79fff; + dev->block_start[BLOCK_MAIN2] = 0x7a000; /* DATA AREA 2 BLOCK */ + dev->block_end[BLOCK_MAIN2] = 0x7bfff; + dev->block_start[BLOCK_MAIN1] = 0x7c000; /* BOOT BLOCK */ + dev->block_end[BLOCK_MAIN1] = 0x7ffff; + } else { + dev->block_start[BLOCK_MAIN1] = 0x00000; /* MAIN BLOCK 1 */ + dev->block_end[BLOCK_MAIN1] = 0x1ffff; + dev->block_start[BLOCK_MAIN2] = 0x20000; /* MAIN BLOCK 2 */ + dev->block_end[BLOCK_MAIN2] = 0x3ffff; + dev->block_start[BLOCK_MAIN3] = 0x40000; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_MAIN3] = 0x5ffff; + dev->block_start[BLOCK_MAIN4] = 0x60000; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0x77fff; + dev->block_start[BLOCK_DATA1] = 0x78000; /* DATA AREA 1 BLOCK */ + dev->block_end[BLOCK_DATA1] = 0x79fff; + dev->block_start[BLOCK_DATA2] = 0x7a000; /* DATA AREA 2 BLOCK */ + dev->block_end[BLOCK_DATA2] = 0x7bfff; + dev->block_start[BLOCK_BOOT] = 0x7c000; /* BOOT BLOCK */ + dev->block_end[BLOCK_BOOT] = 0x7ffff; } break; @@ -440,6 +439,8 @@ intel_flash_init(const device_t *info) /* The block lengths are the same both flash types. */ dev->block_len[BLOCK_MAIN1] = 0x20000; dev->block_len[BLOCK_MAIN2] = 0x18000; + dev->block_len[BLOCK_MAIN3] = 0x00000; + dev->block_len[BLOCK_MAIN4] = 0x00000; dev->block_len[BLOCK_DATA1] = 0x02000; dev->block_len[BLOCK_DATA2] = 0x02000; dev->block_len[BLOCK_BOOT] = 0x04000; @@ -449,6 +450,10 @@ intel_flash_init(const device_t *info) dev->block_end[BLOCK_MAIN1] = 0x3ffff; dev->block_start[BLOCK_MAIN2] = 0x08000; /* MAIN BLOCK 2 */ dev->block_end[BLOCK_MAIN2] = 0x1ffff; + dev->block_start[BLOCK_MAIN3] = 0xfffff; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_MAIN3] = 0xfffff; + dev->block_start[BLOCK_MAIN4] = 0xfffff; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0xfffff; dev->block_start[BLOCK_DATA1] = 0x06000; /* DATA AREA 1 BLOCK */ dev->block_end[BLOCK_DATA1] = 0x07fff; dev->block_start[BLOCK_DATA2] = 0x04000; /* DATA AREA 2 BLOCK */ @@ -460,6 +465,10 @@ intel_flash_init(const device_t *info) dev->block_end[BLOCK_MAIN1] = 0x1ffff; dev->block_start[BLOCK_MAIN2] = 0x20000; /* MAIN BLOCK 2 */ dev->block_end[BLOCK_MAIN2] = 0x37fff; + dev->block_start[BLOCK_MAIN3] = 0xfffff; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_MAIN3] = 0xfffff; + dev->block_start[BLOCK_MAIN4] = 0xfffff; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0xfffff; dev->block_start[BLOCK_DATA1] = 0x38000; /* DATA AREA 1 BLOCK */ dev->block_end[BLOCK_DATA1] = 0x39fff; dev->block_start[BLOCK_DATA2] = 0x3a000; /* DATA AREA 2 BLOCK */ @@ -467,14 +476,16 @@ intel_flash_init(const device_t *info) dev->block_start[BLOCK_BOOT] = 0x3c000; /* BOOT BLOCK */ dev->block_end[BLOCK_BOOT] = 0x3ffff; } - break; + break; - default: + default: dev->flash_id = (type & FLAG_BXB) ? 0x95 : 0x94; /* The block lengths are the same both flash types. */ dev->block_len[BLOCK_MAIN1] = 0x1c000; dev->block_len[BLOCK_MAIN2] = 0x00000; + dev->block_len[BLOCK_MAIN3] = 0x00000; + dev->block_len[BLOCK_MAIN4] = 0x00000; dev->block_len[BLOCK_DATA1] = 0x01000; dev->block_len[BLOCK_DATA2] = 0x01000; dev->block_len[BLOCK_BOOT] = 0x02000; @@ -484,6 +495,10 @@ intel_flash_init(const device_t *info) dev->block_end[BLOCK_MAIN1] = 0x1ffff; dev->block_start[BLOCK_MAIN2] = 0xfffff; /* MAIN BLOCK 2 */ dev->block_end[BLOCK_MAIN2] = 0xfffff; + dev->block_start[BLOCK_MAIN3] = 0xfffff; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_MAIN3] = 0xfffff; + dev->block_start[BLOCK_MAIN4] = 0xfffff; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0xfffff; dev->block_start[BLOCK_DATA1] = 0x02000; /* DATA AREA 1 BLOCK */ dev->block_end[BLOCK_DATA1] = 0x02fff; dev->block_start[BLOCK_DATA2] = 0x03000; /* DATA AREA 2 BLOCK */ @@ -495,6 +510,10 @@ intel_flash_init(const device_t *info) dev->block_end[BLOCK_MAIN1] = 0x1bfff; dev->block_start[BLOCK_MAIN2] = 0xfffff; /* MAIN BLOCK 2 */ dev->block_end[BLOCK_MAIN2] = 0xfffff; + dev->block_start[BLOCK_MAIN3] = 0xfffff; /* MAIN BLOCK 3 */ + dev->block_end[BLOCK_MAIN3] = 0xfffff; + dev->block_start[BLOCK_MAIN4] = 0xfffff; /* MAIN BLOCK 4 */ + dev->block_end[BLOCK_MAIN4] = 0xfffff; dev->block_start[BLOCK_DATA1] = 0x1c000; /* DATA AREA 1 BLOCK */ dev->block_end[BLOCK_DATA1] = 0x1cfff; dev->block_start[BLOCK_DATA2] = 0x1d000; /* DATA AREA 2 BLOCK */ @@ -502,8 +521,8 @@ intel_flash_init(const device_t *info) dev->block_start[BLOCK_BOOT] = 0x1e000; /* BOOT BLOCK */ dev->block_end[BLOCK_BOOT] = 0x1ffff; } - break; - } + break; + } intel_flash_add_mappings(dev); diff --git a/src/mem/mem.c b/src/mem/mem.c index 6641bbc08..ee92d995c 100644 --- a/src/mem/mem.c +++ b/src/mem/mem.c @@ -664,7 +664,9 @@ uint8_t read_mem_b(uint32_t addr) { mem_mapping_t *map; + mem_logical_addr = addr; + addr &= rammask; map = read_mapping[addr >> MEM_GRANULARITY_BITS]; if (map && map->read_b) @@ -674,11 +676,36 @@ read_mem_b(uint32_t addr) } +uint16_t +read_mem_w(uint32_t addr) +{ + mem_mapping_t *map; + + mem_logical_addr = addr; + addr &= rammask; + + if (addr & 1) + return read_mem_b(addr) | (read_mem_b(addr + 1) << 8); + + map = read_mapping[addr >> MEM_GRANULARITY_BITS]; + + if (map && map->read_w) + return map->read_w(addr, map->p); + + if (map && map->read_b) + return map->read_b(addr, map->p) | (map->read_b(addr + 1, map->p) << 8); + + return 0xffff; +} + + void write_mem_b(uint32_t addr, uint8_t val) { mem_mapping_t *map; + mem_logical_addr = addr; + addr &= rammask; map = write_mapping[addr >> MEM_GRANULARITY_BITS]; if (map && map->write_b) @@ -686,6 +713,32 @@ write_mem_b(uint32_t addr, uint8_t val) } +void +write_mem_w(uint32_t addr, uint16_t val) +{ + mem_mapping_t *map; + + mem_logical_addr = addr; + addr &= rammask; + + if (addr & 1) { + write_mem_b(addr, val); + write_mem_b(addr + 1, val >> 8); + return; + } + + map = write_mapping[addr >> MEM_GRANULARITY_BITS]; + if (map) { + if (map->write_w) + map->write_w(addr, val, map->p); + else if (map->write_b) { + map->write_b(addr, val, map->p); + map->write_b(addr + 1, val >> 8, map->p); + } + } +} + + uint8_t readmembl(uint32_t addr) { diff --git a/src/mem/rom.c b/src/mem/rom.c index a75d1ecdf..56bc6314f 100644 --- a/src/mem/rom.c +++ b/src/mem/rom.c @@ -471,7 +471,7 @@ bios_load_linear_combined2(wchar_t *fn1, wchar_t *fn2, wchar_t *fn3, wchar_t *fn ret &= bios_load_aux_linear(fn2, 0x000c0000, 65536, off); ret &= bios_load_aux_linear(fn4, 0x000e0000, sz - 196608, off); if (fn5 != NULL) - ret &= bios_load_aux_linear(fn5, 0x000ec000, 16384, off); + ret &= bios_load_aux_linear(fn5, 0x000ec000, 16384, 0); return ret; } diff --git a/src/pic.c b/src/pic.c index 01c595f0d..ad2efc67e 100644 --- a/src/pic.c +++ b/src/pic.c @@ -45,7 +45,10 @@ enum pic_t pic, pic2; -static int shadow = 0, elcr_enabled = 0; +static pc_timer_t pic_timer; + +static int shadow = 0, elcr_enabled = 0, + tmr_inited = 0, latched = 0; #ifdef ENABLE_PIC_LOG @@ -184,9 +187,7 @@ find_best_interrupt(pic_t *dev) void pic_update_pending(void) { - int is_at; - - is_at = IS_ARCH(machine, (MACHINE_BUS_ISA16 | MACHINE_BUS_MCA | MACHINE_BUS_PCMCIA)); + int is_at = IS_ARCH(machine, (MACHINE_BUS_ISA16 | MACHINE_BUS_MCA | MACHINE_BUS_PCMCIA)); if (is_at) { pic2.int_pending = (find_best_interrupt(&pic2) != -1); @@ -195,26 +196,52 @@ pic_update_pending(void) pic.irr |= (1 << pic2.icw3); else pic.irr &= ~(1 << pic2.icw3); + + pic.int_pending = (find_best_interrupt(&pic) != -1); + return; } - pic.int_pending = (find_best_interrupt(&pic) != -1); + if (find_best_interrupt(&pic) != -1) { + latched++; + if (latched == 1) + timer_on_auto(&pic_timer, is_at ? 0.3 : 0.35); + /* 300 ms on AT+, 350 ns on PC/PCjr/XT */ + } else if (latched == 0) + pic.int_pending = 0; +} + + +static void +pic_callback(void *priv) +{ + pic_t *dev = (pic_t *) priv; + + dev->int_pending = 1; + + latched--; + if (latched > 0) + timer_on_auto(&pic_timer, 0.35); } void pic_reset() { - int is_at; + int is_at = IS_ARCH(machine, (MACHINE_BUS_ISA16 | MACHINE_BUS_MCA | MACHINE_BUS_PCMCIA)); memset(&pic, 0, sizeof(pic_t)); memset(&pic2, 0, sizeof(pic_t)); pic.is_master = 1; - is_at = IS_ARCH(machine, (MACHINE_BUS_ISA16 | MACHINE_BUS_MCA | MACHINE_BUS_PCMCIA)); - if (is_at) pic.slaves[2] = &pic2; + + if (tmr_inited) + timer_on_auto(&pic_timer, 0.0); + memset(&pic_timer, 0x00, sizeof(pc_timer_t)); + timer_add(&pic_timer, pic_callback, &pic, 0); + tmr_inited = 1; } diff --git a/src/pit.c b/src/pit.c index f35699d1a..03fd80c93 100644 --- a/src/pit.c +++ b/src/pit.c @@ -369,7 +369,7 @@ ctr_latch_status(ctr_t *ctr) static void ctr_latch_count(ctr_t *ctr) { - int count = (ctr->latch || ctr->null_count || (ctr->state == 1)) ? ctr->l : ctr->count; + int count = (ctr->latch || (ctr->state == 1)) ? ctr->l : ctr->count; switch (ctr->rm & 0x03) { case 0x00: @@ -581,8 +581,6 @@ pit_write(uint16_t addr, uint8_t val, void *priv) case 1: case 2: /* the actual timers */ ctr = &dev->counters[t]; - /* Reloading timer count, so set null_count to 1. */ - ctr->null_count = 1; switch (ctr->wm) { case 0: @@ -650,7 +648,7 @@ pit_read(uint16_t addr, void *priv) break; } - count = (ctr->null_count || (ctr->state == 1)) ? ctr->l : ctr->count; + count = (ctr->state == 1) ? ctr->l : ctr->count; if (ctr->latched) { ret = (ctr->rl) >> ((ctr->rm & 0x80) ? 8 : 0); diff --git a/src/sio/sio_fdc37c669.c b/src/sio/sio_fdc37c669.c index d9797d38d..b2d76814e 100644 --- a/src/sio/sio_fdc37c669.c +++ b/src/sio/sio_fdc37c669.c @@ -281,7 +281,7 @@ fdc37c669_init(const device_t *info) dev->uart[0] = device_add_inst(&ns16550_device, 1); dev->uart[1] = device_add_inst(&ns16550_device, 2); - io_sethandler(0x3f0, 0x0002, + io_sethandler(info->local ? 0x370 : 0x3f0, 0x0002, fdc37c669_read, NULL, NULL, fdc37c669_write, NULL, NULL, dev); fdc37c669_reset(dev); @@ -298,3 +298,13 @@ const device_t fdc37c669_device = { { NULL }, NULL, NULL, NULL }; + + +const device_t fdc37c669_370_device = { + "SMC FDC37C669 Super I/O (Port 370h)", + 0, + 1, + fdc37c669_init, fdc37c669_close, NULL, + { NULL }, NULL, NULL, + NULL +}; diff --git a/src/video/vid_ati_mach64.c b/src/video/vid_ati_mach64.c index 05859cad0..d7d9f1e90 100644 --- a/src/video/vid_ati_mach64.c +++ b/src/video/vid_ati_mach64.c @@ -32,6 +32,7 @@ #include <86box/rom.h> #include <86box/plat.h> #include <86box/video.h> +#include <86box/vid_ddc.h> #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> #include <86box/vid_ati_eeprom.h> @@ -1737,6 +1738,7 @@ static void mach64_vblank_start(svga_t *svga) uint8_t mach64_ext_readb(uint32_t addr, void *p) { mach64_t *mach64 = (mach64_t *)p; + uint8_t gpio_state; uint8_t ret; if (!(addr & 0x400)) @@ -1866,11 +1868,27 @@ uint8_t mach64_ext_readb(uint32_t addr, void *p) else ret = ati68860_ramdac_in(addr & 3, mach64->svga.ramdac, &mach64->svga); break; - case 0xc4: case 0xc5: case 0xc6: case 0xc7: - if (mach64->type == MACH64_VT2) - mach64->dac_cntl |= (4 << 24); + case 0xc4: case 0xc5: case 0xc6: READ8(addr, mach64->dac_cntl); break; + + case 0xc7: + READ8(addr, mach64->dac_cntl); + if (mach64->type == MACH64_VT2) { + gpio_state = 6; + + if ((ret & (1 << 4)) && !(ret & (1 << 1))) + gpio_state &= ~(1 << 1); + if (!(ret & (1 << 4)) && !ddc_read_data()) + gpio_state &= ~(1 << 1); + if ((ret & (1 << 5)) && !(ret & (1 << 2))) + gpio_state &= ~(1 << 2); + if (!(ret & (1 << 5)) && !ddc_read_clock()) + gpio_state &= ~(1 << 2); + + ret = (ret & ~6) | gpio_state; + } + break; case 0xd0: case 0xd1: case 0xd2: case 0xd3: READ8(addr, mach64->gen_test_cntl); @@ -2169,6 +2187,7 @@ void mach64_ext_writeb(uint32_t addr, uint8_t val, void *p) { mach64_t *mach64 = (mach64_t *)p; svga_t *svga = &mach64->svga; + int data, clk; mach64_log("mach64_ext_writeb : addr %08X val %02X\n", addr, val); @@ -2360,7 +2379,10 @@ void mach64_ext_writeb(uint32_t addr, uint8_t val, void *p) WRITE8(addr, mach64->dac_cntl, val); svga_set_ramdac_type(svga, (mach64->dac_cntl & 0x100) ? RAMDAC_8BIT : RAMDAC_6BIT); ati68860_set_ramdac_type(mach64->svga.ramdac, (mach64->dac_cntl & 0x100) ? RAMDAC_8BIT : RAMDAC_6BIT); - break; + data = (val & (1 << 4)) ? ((val & (1 << 1)) ? 1 : 0) : 1; + clk = (val & (1 << 5)) ? ((val & (1 << 2)) ? 1 : 0) : 1; + ddc_i2c_change(clk, data); + break; case 0xd0: case 0xd1: case 0xd2: case 0xd3: WRITE8(addr, mach64->gen_test_cntl, val); @@ -3346,6 +3368,8 @@ static void *mach64_common_init(const device_t *info) mach64->fifo_not_full_event = thread_create_event(); mach64->fifo_thread = thread_create(fifo_thread, mach64); + ddc_init(); + return mach64; } diff --git a/src/video/vid_ddc.c b/src/video/vid_ddc.c new file mode 100644 index 000000000..4c4aab881 --- /dev/null +++ b/src/video/vid_ddc.c @@ -0,0 +1,334 @@ +#include +#include +#include +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * DDC monitor emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/vid_ddc.h> + + +static uint8_t edid_data[128] = +{ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, /*Fixed header pattern*/ + 0x09, 0xf8, /*Manufacturer "BOX" - apparently unassigned by UEFI - and it has to be big endian*/ + 0x00, 0x00, /*Product code*/ + 0x12, 0x34, 0x56, 0x78, /*Serial number*/ + 0x01, 9, /*Manufacturer week and year*/ + 0x01, 0x03, /*EDID version (1.3)*/ + + 0x08, /*Analogue input, separate sync*/ + 34, 0, /*Landscape, 4:3*/ + 0, /*Gamma*/ + 0x08, /*RGB colour*/ + 0x81, 0xf1, 0xa3, 0x57, 0x53, 0x9f, 0x27, 0x0a, 0x50, /*Chromaticity*/ + + 0xff, 0xff, 0xff, /*Established timing bitmap*/ + 0x00, 0x00, /*Standard timing information*/ + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + + /*Detailed mode descriptions*/ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + 0x00, /*No extensions*/ + 0x00 +}; + +/*This should probably be split off into a separate I2C module*/ +enum +{ + TRANSMITTER_MONITOR = 1, + TRANSMITTER_HOST = -1 +}; + +enum +{ + I2C_IDLE = 0, + I2C_RECEIVE, + I2C_RECEIVE_WAIT, + I2C_TRANSMIT_START, + I2C_TRANSMIT, + I2C_ACKNOWLEDGE, + I2C_TRANSACKNOWLEDGE, + I2C_TRANSMIT_WAIT +}; + +enum +{ + PROM_IDLE = 0, + PROM_RECEIVEADDR, + PROM_RECEIVEDATA, + PROM_SENDDATA, + PROM_INVALID +}; + +static struct +{ + int clock, data; + int state; + int last_data; + int pos; + int transmit; + uint8_t byte; +} i2c; + +static struct +{ + int state; + int addr; + int rw; +} prom; + +static void prom_stop(void) +{ +// pclog("prom_stop()\n"); + prom.state = PROM_IDLE; + i2c.transmit = TRANSMITTER_HOST; +} + +static void prom_next_byte(void) +{ +// pclog("prom_next_byte(%d)\n", prom.addr); + i2c.byte = edid_data[(prom.addr++) & 0x7F]; +} + +static void prom_write(uint8_t byte) +{ +// pclog("prom_write: byte=%02x\n", byte); + switch (prom.state) + { + case PROM_IDLE: + if ((byte & 0xfe) != 0xa0) + { +// pclog("I2C address not PROM\n"); + prom.state = PROM_INVALID; + break; + } + prom.rw = byte & 1; + if (prom.rw) + { + prom.state = PROM_SENDDATA; + i2c.transmit = TRANSMITTER_MONITOR; + i2c.byte = edid_data[(prom.addr++) & 0x7F]; +// pclog("PROM - %02X from %02X\n",i2c.byte, prom.addr-1); +// pclog("Transmitter now PROM\n"); + } + else + { + prom.state = PROM_RECEIVEADDR; + i2c.transmit = TRANSMITTER_HOST; + } +// pclog("PROM R/W=%i\n",promrw); + return; + + case PROM_RECEIVEADDR: +// pclog("PROM addr=%02X\n",byte); + prom.addr = byte; + if (prom.rw) + prom.state = PROM_SENDDATA; + else + prom.state = PROM_RECEIVEDATA; + break; + + case PROM_RECEIVEDATA: +// pclog("PROM write %02X %02X\n",promaddr,byte); + break; + + case PROM_SENDDATA: + break; + } +} + +void ddc_i2c_change(int new_clock, int new_data) +{ +// pclog("I2C update clock %i->%i data %i->%i state %i\n",i2c.clock,new_clock,i2c.last_data,new_data,i2c.state); + switch (i2c.state) + { + case I2C_IDLE: + if (i2c.clock && new_clock) + { + if (i2c.last_data && !new_data) /*Start bit*/ + { +// pclog("Start bit received\n"); + i2c.state = I2C_RECEIVE; + i2c.pos = 0; + } + } + break; + + case I2C_RECEIVE_WAIT: + if (!i2c.clock && new_clock) + i2c.state = I2C_RECEIVE; + case I2C_RECEIVE: + if (!i2c.clock && new_clock) + { + i2c.byte <<= 1; + if (new_data) + i2c.byte |= 1; + else + i2c.byte &= 0xFE; + i2c.pos++; + if (i2c.pos == 8) + { + prom_write(i2c.byte); + i2c.state = I2C_ACKNOWLEDGE; + } + } + else if (i2c.clock && new_clock && new_data && !i2c.last_data) /*Stop bit*/ + { +// pclog("Stop bit received\n"); + i2c.state = I2C_IDLE; + prom_stop(); + } + else if (i2c.clock && new_clock && !new_data && i2c.last_data) /*Start bit*/ + { +// pclog("Start bit received\n"); + i2c.pos = 0; + prom.state = PROM_IDLE; + } + break; + + case I2C_ACKNOWLEDGE: + if (!i2c.clock && new_clock) + { +// pclog("Acknowledging transfer\n"); + new_data = 0; + i2c.pos = 0; + if (i2c.transmit == TRANSMITTER_HOST) + i2c.state = I2C_RECEIVE_WAIT; + else + i2c.state = I2C_TRANSMIT; + } + break; + + case I2C_TRANSACKNOWLEDGE: + if (!i2c.clock && new_clock) + { + if (new_data) /*It's not acknowledged - must be end of transfer*/ + { +// pclog("End of transfer\n"); + i2c.state = I2C_IDLE; + prom_stop(); + } + else /*Next byte to transfer*/ + { + i2c.state = I2C_TRANSMIT_START; + prom_next_byte(); + i2c.pos = 0; +// pclog("Next byte - %02X\n",i2c.byte); + } + } + break; + + case I2C_TRANSMIT_WAIT: + if (i2c.clock && new_clock) + { + if (i2c.last_data && !new_data) /*Start bit*/ + { + prom_next_byte(); + i2c.pos = 0; +// pclog("Next byte - %02X\n",i2c.byte); + } + if (!i2c.last_data && new_data) /*Stop bit*/ + { +// pclog("Stop bit received\n"); + i2c.state = I2C_IDLE; + prom_stop(); + } + } + break; + + case I2C_TRANSMIT_START: + if (!i2c.clock && new_clock) + i2c.state = I2C_TRANSMIT; + if (i2c.clock && new_clock && !i2c.last_data && new_data) /*Stop bit*/ + { +// pclog("Stop bit received\n"); + i2c.state = I2C_IDLE; + prom_stop(); + } + case I2C_TRANSMIT: + if (!i2c.clock && new_clock) + { + i2c.clock = new_clock; +// if (!i2c.pos) +// pclog("Transmit byte %02x\n", i2c.byte); + i2c.data = new_data = i2c.byte & 0x80; +// pclog("Transmit bit %i %i\n", i2c.byte, i2c.pos); + i2c.byte <<= 1; + i2c.pos++; + return; + } + if (i2c.clock && !new_clock && i2c.pos == 8) + { + i2c.state = I2C_TRANSACKNOWLEDGE; +// pclog("Acknowledge mode\n"); + } + break; + + } + if (!i2c.clock && new_clock) + i2c.data = new_data; + i2c.last_data = new_data; + i2c.clock = new_clock; +} + +int ddc_read_clock(void) +{ + return i2c.clock; +} +int ddc_read_data(void) +{ + if (i2c.state == I2C_TRANSMIT || i2c.state == I2C_ACKNOWLEDGE) + return i2c.data; + if (i2c.state == I2C_RECEIVE_WAIT) + return 0; /*ACK*/ + return 1; +} + +void ddc_init(void) +{ + int c; + uint8_t checksum = 0; + + for (c = 0; c < 127; c++) + checksum += edid_data[c]; + edid_data[127] = 256 - checksum; + + i2c.clock = 1; + i2c.data = 1; +} diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index bc3fc038a..81c3b2d34 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -32,6 +32,7 @@ #include <86box/device.h> #include <86box/plat.h> #include <86box/video.h> +#include <86box/vid_ddc.h> #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> @@ -283,6 +284,8 @@ typedef struct virge_t int virge_busy; uint8_t subsys_stat, subsys_cntl, advfunc_cntl; + + uint8_t serialport; } virge_t; static video_timings_t timing_diamond_stealth3d_2000_vlb = {VIDEO_BUS, 2, 2, 3, 28, 28, 45}; @@ -361,6 +364,11 @@ enum #define INT_3DF_EMP (1 << 6) #define INT_MASK 0xff +#define SERIAL_PORT_SCW (1 << 0) +#define SERIAL_PORT_SDW (1 << 1) +#define SERIAL_PORT_SCR (1 << 2) +#define SERIAL_PORT_SDR (1 << 3) + #ifdef ENABLE_S3_VIRGE_LOG int s3_virge_do_log = ENABLE_S3_VIRGE_LOG; @@ -883,6 +891,14 @@ s3_virge_mmio_read(uint32_t addr, void *p) case 0x83d8: case 0x83d9: case 0x83da: case 0x83db: case 0x83dc: case 0x83dd: case 0x83de: case 0x83df: return s3_virge_in(addr & 0x3ff, virge); + + case 0xff20: case 0xff21: + ret = virge->serialport & ~(SERIAL_PORT_SCR | SERIAL_PORT_SDR); + if ((virge->serialport & SERIAL_PORT_SCW) && ddc_read_clock()) + ret |= SERIAL_PORT_SCR; + if ((virge->serialport & SERIAL_PORT_SDW) && ddc_read_data()) + ret |= SERIAL_PORT_SDR; + return ret; } return 0xff; } @@ -1161,6 +1177,11 @@ static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p) case 0x83dc: case 0x83dd: case 0x83de: case 0x83df: s3_virge_out(addr & 0x3ff, val, virge); break; + + case 0xff20: + virge->serialport = val; + ddc_i2c_change((val & SERIAL_PORT_SCW) ? 1 : 0, (val & SERIAL_PORT_SDW) ? 1 : 0); + break; } } } @@ -1176,7 +1197,8 @@ s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p) if ((addr & 0xfffe) == 0x83d4) { s3_virge_mmio_write(addr, val, virge); s3_virge_mmio_write(addr + 1, val >> 8, virge); - } + } else if ((addr & 0xfffe) == 0xff20) + s3_virge_mmio_write(addr, val, virge); } } @@ -1626,6 +1648,10 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p) if (virge->s3d_tri.cmd_set & CMD_SET_AE) queue_triangle(virge); break; + + case 0xff20: + s3_virge_mmio_write(addr, val, virge); + break; } } } @@ -3819,6 +3845,8 @@ static void *s3_virge_init(const device_t *info) virge->fifo_not_full_event = thread_create_event(); virge->fifo_thread = thread_create(fifo_thread, virge); + ddc_init(); + return virge; } diff --git a/src/video/vid_svga.c b/src/video/vid_svga.c index 0f6c37dbb..7a8fc0981 100644 --- a/src/video/vid_svga.c +++ b/src/video/vid_svga.c @@ -447,6 +447,8 @@ svga_recalctimings(svga_t *svga) svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + ((svga->crtc[8] & 0x60) >> 5); svga->ca_adj = 0; + + svga->rowcount = svga->crtc[9] & 31; svga->hdisp_time = svga->hdisp; svga->render = svga_render_blank; @@ -517,7 +519,6 @@ svga_recalctimings(svga_t *svga) } svga->linedbl = svga->crtc[9] & 0x80; - svga->rowcount = svga->crtc[9] & 31; svga->char_width = (svga->seqregs[1] & 1) ? 8 : 9; if (enable_overscan) { diff --git a/src/video/vid_svga_render.c b/src/video/vid_svga_render.c index 00fa85ff9..083ccddbb 100644 --- a/src/video/vid_svga_render.c +++ b/src/video/vid_svga_render.c @@ -28,6 +28,16 @@ #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> +void +svga_render_null(svga_t *svga) +{ + if ((svga->displine + svga->y_add) < 0) + return; + + if (svga->firstline_draw == 2000) + svga->firstline_draw = svga->displine; + svga->lastline_draw = svga->displine; +} void svga_render_blank(svga_t *svga) diff --git a/src/video/vid_table.c b/src/video/vid_table.c index 8fa0e870f..a1c7af27d 100644 --- a/src/video/vid_table.c +++ b/src/video/vid_table.c @@ -123,6 +123,7 @@ video_cards[] = { { "cl_gd5440_pci", &gd5440_pci_device }, { "cl_gd5446_pci", &gd5446_pci_device }, { "cl_gd5480_pci", &gd5480_pci_device }, + { "ctl3d_banshee_pci", &creative_voodoo_banshee_device }, { "stealth32_pci", &et4000w32p_pci_device }, { "stealth3d_2000_pci", &s3_virge_pci_device }, { "stealth3d_3000_pci", &s3_virge_988_pci_device }, @@ -148,6 +149,9 @@ video_cards[] = { { "virge375_vbe20_pci", &s3_virge_375_4_pci_device }, { "cl_gd5446_stb_pci", &gd5446_stb_pci_device }, { "tgui9440_pci", &tgui9440_pci_device }, + { "voodoo3_2k_pci", &voodoo_3_2000_device }, + { "voodoo3_3k_pci", &voodoo_3_3000_device }, + { "voodoo_banshee_pci", &voodoo_banshee_device }, { "mach64gx_vlb", &mach64gx_vlb_device }, { "et4000w32p_vlb", &et4000w32p_cardex_vlb_device }, #if defined(DEV_BRANCH) && defined(USE_CL5422) diff --git a/src/video/vid_voodoo.c b/src/video/vid_voodoo.c index 2ba96fcf4..da9ab2038 100644 --- a/src/video/vid_voodoo.c +++ b/src/video/vid_voodoo.c @@ -6,14 +6,14 @@ * * This file is part of the 86Box distribution. * - * Emulation of the 3DFX Voodoo Graphics controller. + * Voodoo Graphics, 2, Banshee, 3 emulation. * * * * Authors: Sarah Walker, * leilei * - * Copyright 2008-2018 Sarah Walker. + * Copyright 2008-2020 Sarah Walker. */ #include #include @@ -34,1014 +34,20 @@ #include <86box/plat.h> #include <86box/video.h> #include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_blitter.h> +#include <86box/vid_voodoo_display.h> #include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_fb.h> +#include <86box/vid_voodoo_fifo.h> +#include <86box/vid_voodoo_reg.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_texture.h> -#ifdef CLAMP -#undef CLAMP -#endif +rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000]; -#define CLAMP(x) (((x) < 0) ? 0 : (((x) > 0xff) ? 0xff : (x))) -#define CLAMP16(x) (((x) < 0) ? 0 : (((x) > 0xffff) ? 0xffff : (x))) - -#define LOD_MAX 8 - -#define TEX_DIRTY_SHIFT 10 - -#define TEX_CACHE_MAX 64 - -enum -{ - VOODOO_1 = 0, - VOODOO_SB50 = 1, - VOODOO_2 = 2 -}; - -static uint32_t texture_offset[LOD_MAX+3] = -{ - 0, - 256*256, - 256*256 + 128*128, - 256*256 + 128*128 + 64*64, - 256*256 + 128*128 + 64*64 + 32*32, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1, - 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1 + 1 -}; - -static int tris = 0; - -typedef union { - uint32_t i; - float f; -} int_float; - -typedef struct { - uint8_t b, g, r; - uint8_t pad; -} rgbp_t; -typedef struct { - uint8_t b, g, r, a; -} rgba8_t; - -typedef union { - struct { - uint8_t b, g, r, a; - } rgba; - uint32_t u; -} rgba_u; - -#define FIFO_SIZE 65536 -#define FIFO_MASK (FIFO_SIZE - 1) -#define FIFO_ENTRY_SIZE (1 << 31) - -#define FIFO_ENTRIES (voodoo->fifo_write_idx - voodoo->fifo_read_idx) -#define FIFO_FULL ((voodoo->fifo_write_idx - voodoo->fifo_read_idx) >= FIFO_SIZE-4) -#define FIFO_EMPTY (voodoo->fifo_read_idx == voodoo->fifo_write_idx) - -#define FIFO_TYPE 0xff000000 -#define FIFO_ADDR 0x00ffffff - -enum -{ - FIFO_INVALID = (0x00 << 24), - FIFO_WRITEL_REG = (0x01 << 24), - FIFO_WRITEW_FB = (0x02 << 24), - FIFO_WRITEL_FB = (0x03 << 24), - FIFO_WRITEL_TEX = (0x04 << 24) -}; - -#define PARAM_SIZE 1024 -#define PARAM_MASK (PARAM_SIZE - 1) -#define PARAM_ENTRY_SIZE (1 << 31) - -#define PARAM_ENTRIES_1 (voodoo->params_write_idx - voodoo->params_read_idx[0]) -#define PARAM_ENTRIES_2 (voodoo->params_write_idx - voodoo->params_read_idx[1]) -#define PARAM_FULL_1 ((voodoo->params_write_idx - voodoo->params_read_idx[0]) >= PARAM_SIZE) -#define PARAM_FULL_2 ((voodoo->params_write_idx - voodoo->params_read_idx[1]) >= PARAM_SIZE) -#define PARAM_EMPTY_1 (voodoo->params_read_idx[0] == voodoo->params_write_idx) -#define PARAM_EMPTY_2 (voodoo->params_read_idx[1] == voodoo->params_write_idx) - -typedef struct -{ - uint32_t addr_type; - uint32_t val; -} fifo_entry_t; - -static rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000]; - -typedef struct voodoo_params_t -{ - int command; - - int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; - - uint32_t startR, startG, startB, startZ, startA; - - int32_t dBdX, dGdX, dRdX, dAdX, dZdX; - - int32_t dBdY, dGdY, dRdY, dAdY, dZdY; - - int64_t startW, dWdX, dWdY; - - struct - { - int64_t startS, startT, startW, p1; - int64_t dSdX, dTdX, dWdX, p2; - int64_t dSdY, dTdY, dWdY, p3; - } tmu[2]; - - uint32_t color0, color1; - - uint32_t fbzMode; - uint32_t fbzColorPath; - - uint32_t fogMode; - rgbp_t fogColor; - struct - { - uint8_t fog, dfog; - } fogTable[64]; - - uint32_t alphaMode; - - uint32_t zaColor; - - int chromaKey_r, chromaKey_g, chromaKey_b; - uint32_t chromaKey; - - uint32_t textureMode[2]; - uint32_t tLOD[2]; - - uint32_t texBaseAddr[2], texBaseAddr1[2], texBaseAddr2[2], texBaseAddr38[2]; - - uint32_t tex_base[2][LOD_MAX+2]; - uint32_t tex_end[2][LOD_MAX+2]; - int tex_width[2]; - int tex_w_mask[2][LOD_MAX+2]; - int tex_w_nmask[2][LOD_MAX+2]; - int tex_h_mask[2][LOD_MAX+2]; - int tex_shift[2][LOD_MAX+2]; - int tex_lod[2][LOD_MAX+2]; - int tex_entry[2]; - int detail_max[2], detail_bias[2], detail_scale[2]; - - uint32_t draw_offset, aux_offset; - - int tformat[2]; - - int clipLeft, clipRight, clipLowY, clipHighY; - - int sign; - - uint32_t front_offset; - - uint32_t swapbufferCMD; - - uint32_t stipple; -} voodoo_params_t; - -typedef struct texture_t -{ - uint32_t base; - uint32_t tLOD; - volatile int refcount, refcount_r[2]; - int is16; - uint32_t palette_checksum; - uint32_t addr_start[4], addr_end[4]; - uint32_t *data; -} texture_t; - -typedef struct vert_t -{ - float sVx, sVy; - float sRed, sGreen, sBlue, sAlpha; - float sVz, sWb; - float sW0, sS0, sT0; - float sW1, sS1, sT1; -} vert_t; - -typedef struct voodoo_t -{ - mem_mapping_t mapping; - - int pci_enable; - - uint8_t dac_data[8]; - int dac_reg, dac_reg_ff; - uint8_t dac_readdata; - uint16_t dac_pll_regs[16]; - - float pixel_clock; - uint64_t line_time; - - voodoo_params_t params; - - uint32_t fbiInit0, fbiInit1, fbiInit2, fbiInit3, fbiInit4; - uint32_t fbiInit5, fbiInit6, fbiInit7; /*Voodoo 2*/ - - uint32_t initEnable; - - uint32_t lfbMode; - - uint32_t memBaseAddr; - - int_float fvertexAx, fvertexAy, fvertexBx, fvertexBy, fvertexCx, fvertexCy; - - uint32_t front_offset, back_offset; - - uint32_t fb_read_offset, fb_write_offset; - - int row_width; - int block_width; - - uint8_t *fb_mem, *tex_mem[2]; - uint16_t *tex_mem_w[2]; - - int rgb_sel; - - uint32_t trexInit1[2]; - - uint32_t tmuConfig; - - int swap_count; - - int disp_buffer, draw_buffer; - pc_timer_t timer; - - int line; - svga_t *svga; - - uint32_t backPorch; - uint32_t videoDimensions; - uint32_t hSync, vSync; - - int h_total, v_total, v_disp; - int h_disp; - int v_retrace; - - struct - { - uint32_t y[4], i[4], q[4]; - } nccTable[2][2]; - - rgba_u palette[2][256]; - - rgba_u ncc_lookup[2][2][256]; - int ncc_dirty[2]; - - thread_t *fifo_thread; - thread_t *render_thread[2]; - event_t *wake_fifo_thread; - event_t *wake_main_thread; - event_t *fifo_not_full_event; - event_t *render_not_full_event[2]; - event_t *wake_render_thread[2]; - - int voodoo_busy; - int render_voodoo_busy[2]; - - int render_threads; - int odd_even_mask; - - int pixel_count[2], texel_count[2], tri_count, frame_count; - int pixel_count_old[2], texel_count_old[2]; - int wr_count, rd_count, tex_count; - - int retrace_count; - int swap_interval; - uint32_t swap_offset; - int swap_pending; - - int bilinear_enabled; - - int fb_size; - uint32_t fb_mask; - - int texture_size; - uint32_t texture_mask; - - int dual_tmus; - int type; - - fifo_entry_t fifo[FIFO_SIZE]; - volatile int fifo_read_idx, fifo_write_idx; - volatile int cmd_read, cmd_written, cmd_written_fifo; - - voodoo_params_t params_buffer[PARAM_SIZE]; - volatile int params_read_idx[2], params_write_idx; - - uint32_t cmdfifo_base, cmdfifo_end; - int cmdfifo_rp; - volatile int cmdfifo_depth_rd, cmdfifo_depth_wr; - uint32_t cmdfifo_amin, cmdfifo_amax; - - uint32_t sSetupMode; - vert_t verts[4]; - int vertex_num; - int num_verticies; - - int flush; - - int scrfilter; - int scrfilterEnabled; - int scrfilterThreshold; - int scrfilterThresholdOld; - - uint32_t last_write_addr; - - uint32_t fbiPixelsIn; - uint32_t fbiChromaFail; - uint32_t fbiZFuncFail; - uint32_t fbiAFuncFail; - uint32_t fbiPixelsOut; - - uint32_t bltSrcBaseAddr; - uint32_t bltDstBaseAddr; - int bltSrcXYStride, bltDstXYStride; - uint32_t bltSrcChromaRange, bltDstChromaRange; - int bltSrcChromaMinR, bltSrcChromaMinG, bltSrcChromaMinB; - int bltSrcChromaMaxR, bltSrcChromaMaxG, bltSrcChromaMaxB; - int bltDstChromaMinR, bltDstChromaMinG, bltDstChromaMinB; - int bltDstChromaMaxR, bltDstChromaMaxG, bltDstChromaMaxB; - - int bltClipRight, bltClipLeft; - int bltClipHighY, bltClipLowY; - - int bltSrcX, bltSrcY; - int bltDstX, bltDstY; - int bltSizeX, bltSizeY; - int bltRop[4]; - uint16_t bltColorFg, bltColorBg; - - uint32_t bltCommand; - - struct - { - int dst_x, dst_y; - int cur_x; - int size_x, size_y; - int x_dir, y_dir; - int dst_stride; - } blt; - - rgbp_t clutData[64]; - int clutData_dirty; - rgbp_t clutData256[256]; - uint32_t video_16to32[0x10000]; - - uint8_t dirty_line[1024]; - int dirty_line_low, dirty_line_high; - - int fb_write_buffer, fb_draw_buffer; - int buffer_cutoff; - - int read_time, write_time, burst_time; - - pc_timer_t wake_timer; - - uint8_t thefilter[256][256]; // pixel filter, feeding from one or two - uint8_t thefilterg[256][256]; // for green - uint8_t thefilterb[256][256]; // for blue - - /* the voodoo adds purple lines for some reason */ - uint16_t purpleline[256][3]; - - texture_t texture_cache[2][TEX_CACHE_MAX]; - uint8_t texture_present[2][4096]; - int texture_last_removed; - - uint32_t palette_checksum[2]; - int palette_dirty[2]; - - uint64_t time; - int render_time[2]; - - int use_recompiler; - void *codegen_data; - - struct voodoo_set_t *set; -} voodoo_t; - -typedef struct voodoo_set_t -{ - voodoo_t *voodoos[2]; - - mem_mapping_t snoop_mapping; - - int nr_cards; -} voodoo_set_t; - -static inline void wait_for_render_thread_idle(voodoo_t *voodoo); - -enum -{ - SST_status = 0x000, - SST_intrCtrl = 0x004, - - SST_vertexAx = 0x008, - SST_vertexAy = 0x00c, - SST_vertexBx = 0x010, - SST_vertexBy = 0x014, - SST_vertexCx = 0x018, - SST_vertexCy = 0x01c, - - SST_startR = 0x0020, - SST_startG = 0x0024, - SST_startB = 0x0028, - SST_startZ = 0x002c, - SST_startA = 0x0030, - SST_startS = 0x0034, - SST_startT = 0x0038, - SST_startW = 0x003c, - - SST_dRdX = 0x0040, - SST_dGdX = 0x0044, - SST_dBdX = 0x0048, - SST_dZdX = 0x004c, - SST_dAdX = 0x0050, - SST_dSdX = 0x0054, - SST_dTdX = 0x0058, - SST_dWdX = 0x005c, - - SST_dRdY = 0x0060, - SST_dGdY = 0x0064, - SST_dBdY = 0x0068, - SST_dZdY = 0x006c, - SST_dAdY = 0x0070, - SST_dSdY = 0x0074, - SST_dTdY = 0x0078, - SST_dWdY = 0x007c, - - SST_triangleCMD = 0x0080, - - SST_fvertexAx = 0x088, - SST_fvertexAy = 0x08c, - SST_fvertexBx = 0x090, - SST_fvertexBy = 0x094, - SST_fvertexCx = 0x098, - SST_fvertexCy = 0x09c, - - SST_fstartR = 0x00a0, - SST_fstartG = 0x00a4, - SST_fstartB = 0x00a8, - SST_fstartZ = 0x00ac, - SST_fstartA = 0x00b0, - SST_fstartS = 0x00b4, - SST_fstartT = 0x00b8, - SST_fstartW = 0x00bc, - - SST_fdRdX = 0x00c0, - SST_fdGdX = 0x00c4, - SST_fdBdX = 0x00c8, - SST_fdZdX = 0x00cc, - SST_fdAdX = 0x00d0, - SST_fdSdX = 0x00d4, - SST_fdTdX = 0x00d8, - SST_fdWdX = 0x00dc, - - SST_fdRdY = 0x00e0, - SST_fdGdY = 0x00e4, - SST_fdBdY = 0x00e8, - SST_fdZdY = 0x00ec, - SST_fdAdY = 0x00f0, - SST_fdSdY = 0x00f4, - SST_fdTdY = 0x00f8, - SST_fdWdY = 0x00fc, - - SST_ftriangleCMD = 0x0100, - - SST_fbzColorPath = 0x104, - SST_fogMode = 0x108, - - SST_alphaMode = 0x10c, - SST_fbzMode = 0x110, - SST_lfbMode = 0x114, - - SST_clipLeftRight = 0x118, - SST_clipLowYHighY = 0x11c, - - SST_nopCMD = 0x120, - SST_fastfillCMD = 0x124, - SST_swapbufferCMD = 0x128, - - SST_fogColor = 0x12c, - SST_zaColor = 0x130, - SST_chromaKey = 0x134, - - SST_userIntrCMD = 0x13c, - SST_stipple = 0x140, - SST_color0 = 0x144, - SST_color1 = 0x148, - - SST_fbiPixelsIn = 0x14c, - SST_fbiChromaFail = 0x150, - SST_fbiZFuncFail = 0x154, - SST_fbiAFuncFail = 0x158, - SST_fbiPixelsOut = 0x15c, - - SST_fogTable00 = 0x160, - SST_fogTable01 = 0x164, - SST_fogTable02 = 0x168, - SST_fogTable03 = 0x16c, - SST_fogTable04 = 0x170, - SST_fogTable05 = 0x174, - SST_fogTable06 = 0x178, - SST_fogTable07 = 0x17c, - SST_fogTable08 = 0x180, - SST_fogTable09 = 0x184, - SST_fogTable0a = 0x188, - SST_fogTable0b = 0x18c, - SST_fogTable0c = 0x190, - SST_fogTable0d = 0x194, - SST_fogTable0e = 0x198, - SST_fogTable0f = 0x19c, - SST_fogTable10 = 0x1a0, - SST_fogTable11 = 0x1a4, - SST_fogTable12 = 0x1a8, - SST_fogTable13 = 0x1ac, - SST_fogTable14 = 0x1b0, - SST_fogTable15 = 0x1b4, - SST_fogTable16 = 0x1b8, - SST_fogTable17 = 0x1bc, - SST_fogTable18 = 0x1c0, - SST_fogTable19 = 0x1c4, - SST_fogTable1a = 0x1c8, - SST_fogTable1b = 0x1cc, - SST_fogTable1c = 0x1d0, - SST_fogTable1d = 0x1d4, - SST_fogTable1e = 0x1d8, - SST_fogTable1f = 0x1dc, - - SST_cmdFifoBaseAddr = 0x1e0, - SST_cmdFifoBump = 0x1e4, - SST_cmdFifoRdPtr = 0x1e8, - SST_cmdFifoAMin = 0x1ec, - SST_cmdFifoAMax = 0x1f0, - SST_cmdFifoDepth = 0x1f4, - SST_cmdFifoHoles = 0x1f8, - - SST_fbiInit4 = 0x200, - SST_vRetrace = 0x204, - SST_backPorch = 0x208, - SST_videoDimensions = 0x20c, - SST_fbiInit0 = 0x210, - SST_fbiInit1 = 0x214, - SST_fbiInit2 = 0x218, - SST_fbiInit3 = 0x21c, - SST_hSync = 0x220, - SST_vSync = 0x224, - SST_clutData = 0x228, - SST_dacData = 0x22c, - - SST_scrFilter = 0x230, - - SST_hvRetrace = 0x240, - SST_fbiInit5 = 0x244, - SST_fbiInit6 = 0x248, - SST_fbiInit7 = 0x24c, - - SST_sSetupMode = 0x260, - SST_sVx = 0x264, - SST_sVy = 0x268, - SST_sARGB = 0x26c, - SST_sRed = 0x270, - SST_sGreen = 0x274, - SST_sBlue = 0x278, - SST_sAlpha = 0x27c, - SST_sVz = 0x280, - SST_sWb = 0x284, - SST_sW0 = 0x288, - SST_sS0 = 0x28c, - SST_sT0 = 0x290, - SST_sW1 = 0x294, - SST_sS1 = 0x298, - SST_sT1 = 0x29c, - - SST_sDrawTriCMD = 0x2a0, - SST_sBeginTriCMD = 0x2a4, - - SST_bltSrcBaseAddr = 0x2c0, - SST_bltDstBaseAddr = 0x2c4, - SST_bltXYStrides = 0x2c8, - SST_bltSrcChromaRange = 0x2cc, - SST_bltDstChromaRange = 0x2d0, - SST_bltClipX = 0x2d4, - SST_bltClipY = 0x2d8, - - SST_bltSrcXY = 0x2e0, - SST_bltDstXY = 0x2e4, - SST_bltSize = 0x2e8, - SST_bltRop = 0x2ec, - SST_bltColor = 0x2f0, - - SST_bltCommand = 0x2f8, - SST_bltData = 0x2fc, - - SST_textureMode = 0x300, - SST_tLOD = 0x304, - SST_tDetail = 0x308, - SST_texBaseAddr = 0x30c, - SST_texBaseAddr1 = 0x310, - SST_texBaseAddr2 = 0x314, - SST_texBaseAddr38 = 0x318, - - SST_trexInit1 = 0x320, - - SST_nccTable0_Y0 = 0x324, - SST_nccTable0_Y1 = 0x328, - SST_nccTable0_Y2 = 0x32c, - SST_nccTable0_Y3 = 0x330, - SST_nccTable0_I0 = 0x334, - SST_nccTable0_I1 = 0x338, - SST_nccTable0_I2 = 0x33c, - SST_nccTable0_I3 = 0x340, - SST_nccTable0_Q0 = 0x344, - SST_nccTable0_Q1 = 0x348, - SST_nccTable0_Q2 = 0x34c, - SST_nccTable0_Q3 = 0x350, - - SST_nccTable1_Y0 = 0x354, - SST_nccTable1_Y1 = 0x358, - SST_nccTable1_Y2 = 0x35c, - SST_nccTable1_Y3 = 0x360, - SST_nccTable1_I0 = 0x364, - SST_nccTable1_I1 = 0x368, - SST_nccTable1_I2 = 0x36c, - SST_nccTable1_I3 = 0x370, - SST_nccTable1_Q0 = 0x374, - SST_nccTable1_Q1 = 0x378, - SST_nccTable1_Q2 = 0x37c, - SST_nccTable1_Q3 = 0x380, - - SST_remap_status = 0x000 | 0x400, - - SST_remap_vertexAx = 0x008 | 0x400, - SST_remap_vertexAy = 0x00c | 0x400, - SST_remap_vertexBx = 0x010 | 0x400, - SST_remap_vertexBy = 0x014 | 0x400, - SST_remap_vertexCx = 0x018 | 0x400, - SST_remap_vertexCy = 0x01c | 0x400, - - SST_remap_startR = 0x0020 | 0x400, - SST_remap_startG = 0x002c | 0x400, - SST_remap_startB = 0x0038 | 0x400, - SST_remap_startZ = 0x0044 | 0x400, - SST_remap_startA = 0x0050 | 0x400, - SST_remap_startS = 0x005c | 0x400, - SST_remap_startT = 0x0068 | 0x400, - SST_remap_startW = 0x0074 | 0x400, - - SST_remap_dRdX = 0x0024 | 0x400, - SST_remap_dGdX = 0x0030 | 0x400, - SST_remap_dBdX = 0x003c | 0x400, - SST_remap_dZdX = 0x0048 | 0x400, - SST_remap_dAdX = 0x0054 | 0x400, - SST_remap_dSdX = 0x0060 | 0x400, - SST_remap_dTdX = 0x006c | 0x400, - SST_remap_dWdX = 0x0078 | 0x400, - - SST_remap_dRdY = 0x0028 | 0x400, - SST_remap_dGdY = 0x0034 | 0x400, - SST_remap_dBdY = 0x0040 | 0x400, - SST_remap_dZdY = 0x004c | 0x400, - SST_remap_dAdY = 0x0058 | 0x400, - SST_remap_dSdY = 0x0064 | 0x400, - SST_remap_dTdY = 0x0070 | 0x400, - SST_remap_dWdY = 0x007c | 0x400, - - SST_remap_triangleCMD = 0x0080 | 0x400, - - SST_remap_fvertexAx = 0x088 | 0x400, - SST_remap_fvertexAy = 0x08c | 0x400, - SST_remap_fvertexBx = 0x090 | 0x400, - SST_remap_fvertexBy = 0x094 | 0x400, - SST_remap_fvertexCx = 0x098 | 0x400, - SST_remap_fvertexCy = 0x09c | 0x400, - - SST_remap_fstartR = 0x00a0 | 0x400, - SST_remap_fstartG = 0x00ac | 0x400, - SST_remap_fstartB = 0x00b8 | 0x400, - SST_remap_fstartZ = 0x00c4 | 0x400, - SST_remap_fstartA = 0x00d0 | 0x400, - SST_remap_fstartS = 0x00dc | 0x400, - SST_remap_fstartT = 0x00e8 | 0x400, - SST_remap_fstartW = 0x00f4 | 0x400, - - SST_remap_fdRdX = 0x00a4 | 0x400, - SST_remap_fdGdX = 0x00b0 | 0x400, - SST_remap_fdBdX = 0x00bc | 0x400, - SST_remap_fdZdX = 0x00c8 | 0x400, - SST_remap_fdAdX = 0x00d4 | 0x400, - SST_remap_fdSdX = 0x00e0 | 0x400, - SST_remap_fdTdX = 0x00ec | 0x400, - SST_remap_fdWdX = 0x00f8 | 0x400, - - SST_remap_fdRdY = 0x00a8 | 0x400, - SST_remap_fdGdY = 0x00b4 | 0x400, - SST_remap_fdBdY = 0x00c0 | 0x400, - SST_remap_fdZdY = 0x00cc | 0x400, - SST_remap_fdAdY = 0x00d8 | 0x400, - SST_remap_fdSdY = 0x00e4 | 0x400, - SST_remap_fdTdY = 0x00f0 | 0x400, - SST_remap_fdWdY = 0x00fc | 0x400, -}; - -enum -{ - LFB_WRITE_FRONT = 0x0000, - LFB_WRITE_BACK = 0x0010, - LFB_WRITE_MASK = 0x0030 -}; - -enum -{ - LFB_READ_FRONT = 0x0000, - LFB_READ_BACK = 0x0040, - LFB_READ_AUX = 0x0080, - LFB_READ_MASK = 0x00c0 -}; - -enum -{ - LFB_FORMAT_RGB565 = 0, - LFB_FORMAT_RGB555 = 1, - LFB_FORMAT_ARGB1555 = 2, - LFB_FORMAT_ARGB8888 = 5, - LFB_FORMAT_DEPTH = 15, - LFB_FORMAT_MASK = 15 -}; - -enum -{ - LFB_WRITE_COLOUR = 1, - LFB_WRITE_DEPTH = 2 -}; - -enum -{ - FBZ_CHROMAKEY = (1 << 1), - FBZ_W_BUFFER = (1 << 3), - FBZ_DEPTH_ENABLE = (1 << 4), - - FBZ_DITHER = (1 << 8), - FBZ_RGB_WMASK = (1 << 9), - FBZ_DEPTH_WMASK = (1 << 10), - FBZ_DITHER_2x2 = (1 << 11), - - FBZ_DRAW_FRONT = 0x0000, - FBZ_DRAW_BACK = 0x4000, - FBZ_DRAW_MASK = 0xc000, - - FBZ_DEPTH_BIAS = (1 << 16), - - FBZ_DEPTH_SOURCE = (1 << 20), - - FBZ_PARAM_ADJUST = (1 << 26) -}; - -enum -{ - TEX_RGB332 = 0x0, - TEX_Y4I2Q2 = 0x1, - TEX_A8 = 0x2, - TEX_I8 = 0x3, - TEX_AI8 = 0x4, - TEX_PAL8 = 0x5, - TEX_APAL8 = 0x6, - TEX_ARGB8332 = 0x8, - TEX_A8Y4I2Q2 = 0x9, - TEX_R5G6B5 = 0xa, - TEX_ARGB1555 = 0xb, - TEX_ARGB4444 = 0xc, - TEX_A8I8 = 0xd, - TEX_APAL88 = 0xe -}; - -enum -{ - TEXTUREMODE_NCC_SEL = (1 << 5), - TEXTUREMODE_TCLAMPS = (1 << 6), - TEXTUREMODE_TCLAMPT = (1 << 7), - TEXTUREMODE_TRILINEAR = (1 << 30) -}; - -enum -{ - FBIINIT0_VGA_PASS = 1, - FBIINIT0_GRAPHICS_RESET = (1 << 1) -}; - -enum -{ - FBIINIT1_MULTI_SST = (1 << 2), /*Voodoo Graphics only*/ - FBIINIT1_VIDEO_RESET = (1 << 8), - FBIINIT1_SLI_ENABLE = (1 << 23) -}; - -enum -{ - FBIINIT2_SWAP_ALGORITHM_MASK = (3 << 9) -}; - -enum -{ - FBIINIT2_SWAP_ALGORITHM_DAC_VSYNC = (0 << 9), - FBIINIT2_SWAP_ALGORITHM_DAC_DATA = (1 << 9), - FBIINIT2_SWAP_ALGORITHM_PCI_FIFO_STALL = (2 << 9), - FBIINIT2_SWAP_ALGORITHM_SLI_SYNC = (3 << 9) -}; - -enum -{ - FBIINIT3_REMAP = 1 -}; - -enum -{ - FBIINIT5_MULTI_CVG = (1 << 14) -}; - -enum -{ - FBIINIT7_CMDFIFO_ENABLE = (1 << 8) -}; - -enum -{ - CC_LOCALSELECT_ITER_RGB = 0, - CC_LOCALSELECT_TEX = 1, - CC_LOCALSELECT_COLOR1 = 2, - CC_LOCALSELECT_LFB = 3 -}; - -enum -{ - CCA_LOCALSELECT_ITER_A = 0, - CCA_LOCALSELECT_COLOR0 = 1, - CCA_LOCALSELECT_ITER_Z = 2 -}; - -enum -{ - C_SEL_ITER_RGB = 0, - C_SEL_TEX = 1, - C_SEL_COLOR1 = 2, - C_SEL_LFB = 3 -}; - -enum -{ - A_SEL_ITER_A = 0, - A_SEL_TEX = 1, - A_SEL_COLOR1 = 2, - A_SEL_LFB = 3 -}; - -enum -{ - CC_MSELECT_ZERO = 0, - CC_MSELECT_CLOCAL = 1, - CC_MSELECT_AOTHER = 2, - CC_MSELECT_ALOCAL = 3, - CC_MSELECT_TEX = 4, - CC_MSELECT_TEXRGB = 5 -}; - -enum -{ - CCA_MSELECT_ZERO = 0, - CCA_MSELECT_ALOCAL = 1, - CCA_MSELECT_AOTHER = 2, - CCA_MSELECT_ALOCAL2 = 3, - CCA_MSELECT_TEX = 4 -}; - -enum -{ - TC_MSELECT_ZERO = 0, - TC_MSELECT_CLOCAL = 1, - TC_MSELECT_AOTHER = 2, - TC_MSELECT_ALOCAL = 3, - TC_MSELECT_DETAIL = 4, - TC_MSELECT_LOD_FRAC = 5 -}; - -enum -{ - TCA_MSELECT_ZERO = 0, - TCA_MSELECT_CLOCAL = 1, - TCA_MSELECT_AOTHER = 2, - TCA_MSELECT_ALOCAL = 3, - TCA_MSELECT_DETAIL = 4, - TCA_MSELECT_LOD_FRAC = 5 -}; - -enum -{ - CC_ADD_CLOCAL = 1, - CC_ADD_ALOCAL = 2 -}; - -enum -{ - CCA_ADD_CLOCAL = 1, - CCA_ADD_ALOCAL = 2 -}; - -enum -{ - AFUNC_AZERO = 0x0, - AFUNC_ASRC_ALPHA = 0x1, - AFUNC_A_COLOR = 0x2, - AFUNC_ADST_ALPHA = 0x3, - AFUNC_AONE = 0x4, - AFUNC_AOMSRC_ALPHA = 0x5, - AFUNC_AOM_COLOR = 0x6, - AFUNC_AOMDST_ALPHA = 0x7, - AFUNC_ASATURATE = 0xf -}; - -enum -{ - AFUNC_ACOLORBEFOREFOG = 0xf -}; - -enum -{ - AFUNC_NEVER = 0, - AFUNC_LESSTHAN = 1, - AFUNC_EQUAL = 2, - AFUNC_LESSTHANEQUAL = 3, - AFUNC_GREATERTHAN = 4, - AFUNC_NOTEQUAL = 5, - AFUNC_GREATERTHANEQUAL = 6, - AFUNC_ALWAYS = 7 -}; - -enum -{ - DEPTHOP_NEVER = 0, - DEPTHOP_LESSTHAN = 1, - DEPTHOP_EQUAL = 2, - DEPTHOP_LESSTHANEQUAL = 3, - DEPTHOP_GREATERTHAN = 4, - DEPTHOP_NOTEQUAL = 5, - DEPTHOP_GREATERTHANEQUAL = 6, - DEPTHOP_ALWAYS = 7 -}; - -enum -{ - FOG_ENABLE = 0x01, - FOG_ADD = 0x02, - FOG_MULT = 0x04, - FOG_ALPHA = 0x08, - FOG_Z = 0x10, - FOG_W = 0x18, - FOG_CONSTANT = 0x20 -}; - -enum -{ - LOD_ODD = (1 << 18), - LOD_SPLIT = (1 << 19), - LOD_S_IS_WIDER = (1 << 20), - LOD_TMULTIBASEADDR = (1 << 24), - LOD_TMIRROR_S = (1 << 28), - LOD_TMIRROR_T = (1 << 29) -}; -enum -{ - CMD_INVALID = 0, - CMD_DRAWTRIANGLE, - CMD_FASTFILL, - CMD_SWAPBUF -}; - -enum -{ - FBZCP_TEXTURE_ENABLED = (1 << 27) -}; - -enum -{ - BLTCMD_SRC_TILED = (1 << 14), - BLTCMD_DST_TILED = (1 << 15) -}; - -enum -{ - INITENABLE_SLI_MASTER_SLAVE = (1 << 11) -}; - -#define TEXTUREMODE_MASK 0x3ffff000 -#define TEXTUREMODE_PASSTHROUGH 0 - -#define TEXTUREMODE_LOCAL_MASK 0x00643000 -#define TEXTUREMODE_LOCAL 0x00241000 +int tris = 0; #ifdef ENABLE_VOODOO_LOG int voodoo_do_log = ENABLE_VOODOO_LOG; @@ -1063,58 +69,13 @@ voodoo_log(const char *fmt, ...) #endif -static void voodoo_threshold_check(voodoo_t *voodoo); - -static void voodoo_update_ncc(voodoo_t *voodoo, int tmu) -{ - int tbl; - - for (tbl = 0; tbl < 2; tbl++) - { - int col; - - for (col = 0; col < 256; col++) - { - int y = (col >> 4), i = (col >> 2) & 3, q = col & 3; - int i_r, i_g, i_b; - int q_r, q_g, q_b; - - y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; - - i_r = (voodoo->nccTable[tmu][tbl].i[i] >> 18) & 0x1ff; - if (i_r & 0x100) - i_r |= 0xfffffe00; - i_g = (voodoo->nccTable[tmu][tbl].i[i] >> 9) & 0x1ff; - if (i_g & 0x100) - i_g |= 0xfffffe00; - i_b = voodoo->nccTable[tmu][tbl].i[i] & 0x1ff; - if (i_b & 0x100) - i_b |= 0xfffffe00; - - q_r = (voodoo->nccTable[tmu][tbl].q[q] >> 18) & 0x1ff; - if (q_r & 0x100) - q_r |= 0xfffffe00; - q_g = (voodoo->nccTable[tmu][tbl].q[q] >> 9) & 0x1ff; - if (q_g & 0x100) - q_g |= 0xfffffe00; - q_b = voodoo->nccTable[tmu][tbl].q[q] & 0x1ff; - if (q_b & 0x100) - q_b |= 0xfffffe00; - - voodoo->ncc_lookup[tmu][tbl][col].rgba.r = CLAMP(y + i_r + q_r); - voodoo->ncc_lookup[tmu][tbl][col].rgba.g = CLAMP(y + i_g + q_g); - voodoo->ncc_lookup[tmu][tbl][col].rgba.b = CLAMP(y + i_b + q_b); - voodoo->ncc_lookup[tmu][tbl][col].rgba.a = 0xff; - } - } -} - -#define SLI_ENABLED (voodoo->fbiInit1 & FBIINIT1_SLI_ENABLE) -#define TRIPLE_BUFFER ((voodoo->fbiInit2 & 0x10) || (voodoo->fbiInit5 & 0x600) == 0x400) -static void voodoo_recalc(voodoo_t *voodoo) +void voodoo_recalc(voodoo_t *voodoo) { uint32_t buffer_offset = ((voodoo->fbiInit2 >> 11) & 511) * 4096; - + + if (voodoo->type >= VOODOO_BANSHEE) + return; + voodoo->params.front_offset = voodoo->disp_buffer*buffer_offset; voodoo->back_offset = voodoo->draw_buffer*buffer_offset; @@ -1156,7 +117,7 @@ static void voodoo_recalc(voodoo_t *voodoo) default: fatal("voodoo_recalc : unknown lfb source\n"); } - + switch (voodoo->params.fbzMode & FBZ_DRAW_MASK) { case FBZ_DRAW_FRONT: @@ -1171,4789 +132,16 @@ static void voodoo_recalc(voodoo_t *voodoo) default: fatal("voodoo_recalc : unknown draw buffer\n"); } - + voodoo->block_width = ((voodoo->fbiInit1 >> 4) & 15) * 2; if (voodoo->fbiInit6 & (1 << 30)) voodoo->block_width += 1; if (voodoo->fbiInit1 & (1 << 24)) voodoo->block_width += 32; voodoo->row_width = voodoo->block_width * 32 * 2; - -/* voodoo_log("voodoo_recalc : front_offset %08X back_offset %08X aux_offset %08X draw_offset %08x\n", voodoo->params.front_offset, voodoo->back_offset, voodoo->params.aux_offset, voodoo->params.draw_offset); - voodoo_log(" fb_read_offset %08X fb_write_offset %08X row_width %i %08x %08x\n", voodoo->fb_read_offset, voodoo->fb_write_offset, voodoo->row_width, voodoo->lfbMode, voodoo->params.fbzMode);*/ + voodoo->aux_row_width = voodoo->row_width; } -static void voodoo_recalc_tex(voodoo_t *voodoo, int tmu) -{ - int aspect = (voodoo->params.tLOD[tmu] >> 21) & 3; - int width = 256, height = 256; - int shift = 8; - int lod; - uint32_t base = voodoo->params.texBaseAddr[tmu]; - uint32_t offset = 0; - int tex_lod = 0; - - if (voodoo->params.tLOD[tmu] & LOD_S_IS_WIDER) - height >>= aspect; - else - { - width >>= aspect; - shift -= aspect; - } - - if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD)) - { - width >>= 1; - height >>= 1; - shift--; - tex_lod++; - if (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR) - base = voodoo->params.texBaseAddr1[tmu]; - } - - for (lod = 0; lod <= LOD_MAX+1; lod++) - { - if (!width) - width = 1; - if (!height) - height = 1; - if (shift < 0) - shift = 0; - voodoo->params.tex_base[tmu][lod] = base + offset; - if (voodoo->params.tformat[tmu] & 8) - voodoo->params.tex_end[tmu][lod] = base + offset + (width * height * 2); - else - voodoo->params.tex_end[tmu][lod] = base + offset + (width * height); - voodoo->params.tex_w_mask[tmu][lod] = width - 1; - voodoo->params.tex_w_nmask[tmu][lod] = ~(width - 1); - voodoo->params.tex_h_mask[tmu][lod] = height - 1; - voodoo->params.tex_shift[tmu][lod] = shift; - voodoo->params.tex_lod[tmu][lod] = tex_lod; - - if (!(voodoo->params.tLOD[tmu] & LOD_SPLIT) || ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) - { - if (!(voodoo->params.tLOD[tmu] & LOD_ODD) || lod != 0) - { - if (voodoo->params.tformat[tmu] & 8) - offset += width * height * 2; - else - offset += width * height; - - if (voodoo->params.tLOD[tmu] & LOD_SPLIT) - { - width >>= 2; - height >>= 2; - shift -= 2; - tex_lod += 2; - } - else - { - width >>= 1; - height >>= 1; - shift--; - tex_lod++; - } - - if (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR) - { - switch (tex_lod) - { - case 0: - base = voodoo->params.texBaseAddr[tmu]; - break; - case 1: - base = voodoo->params.texBaseAddr1[tmu]; - break; - case 2: - base = voodoo->params.texBaseAddr2[tmu]; - break; - default: - base = voodoo->params.texBaseAddr38[tmu]; - break; - } - } - } - } - } - - voodoo->params.tex_width[tmu] = width; -} - -#define makergba(r, g, b, a) ((b) | ((g) << 8) | ((r) << 16) | ((a) << 24)) - -static void use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) -{ - int c, d; - int lod; - int lod_min, lod_max; - uint32_t addr = 0, addr_end; - uint32_t palette_checksum; - - lod_min = (params->tLOD[tmu] >> 2) & 15; - lod_max = (params->tLOD[tmu] >> 8) & 15; - - if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) - { - if (voodoo->palette_dirty[tmu]) - { - palette_checksum = 0; - - for (c = 0; c < 256; c++) - palette_checksum ^= voodoo->palette[tmu][c].u; - - voodoo->palette_checksum[tmu] = palette_checksum; - voodoo->palette_dirty[tmu] = 0; - } - else - palette_checksum = voodoo->palette_checksum[tmu]; - } - else - palette_checksum = 0; - - if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) - addr = params->texBaseAddr1[tmu]; - else - addr = params->texBaseAddr[tmu]; - - /*Try to find texture in cache*/ - for (c = 0; c < TEX_CACHE_MAX; c++) - { - if (voodoo->texture_cache[tmu][c].base == addr && - voodoo->texture_cache[tmu][c].tLOD == (params->tLOD[tmu] & 0xf00fff) && - voodoo->texture_cache[tmu][c].palette_checksum == palette_checksum) - { - params->tex_entry[tmu] = c; - voodoo->texture_cache[tmu][c].refcount++; - return; - } - } - - /*Texture not found, search for unused texture*/ - do - { - for (c = 0; c < TEX_CACHE_MAX; c++) - { - voodoo->texture_last_removed++; - voodoo->texture_last_removed &= (TEX_CACHE_MAX-1); - if (voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[0] && - (voodoo->render_threads == 1 || voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[1])) - break; - } - if (c == TEX_CACHE_MAX) - wait_for_render_thread_idle(voodoo); - } while (c == TEX_CACHE_MAX); - if (c == TEX_CACHE_MAX) - fatal("Texture cache full!\n"); - - c = voodoo->texture_last_removed; - - - if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) - voodoo->texture_cache[tmu][c].base = params->texBaseAddr1[tmu]; - else - voodoo->texture_cache[tmu][c].base = params->texBaseAddr[tmu]; - voodoo->texture_cache[tmu][c].tLOD = params->tLOD[tmu] & 0xf00fff; - - lod_min = (params->tLOD[tmu] >> 2) & 15; - lod_max = (params->tLOD[tmu] >> 8) & 15; -// voodoo_log(" add new texture to %i tformat=%i %08x LOD=%i-%i tmu=%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max, tmu); - - lod_min = MIN(lod_min, 8); - lod_max = MIN(lod_max, 8); - for (lod = lod_min; lod <= lod_max; lod++) - { - uint32_t *base = &voodoo->texture_cache[tmu][c].data[texture_offset[lod]]; - uint32_t tex_addr = params->tex_base[tmu][lod] & voodoo->texture_mask; - int x, y; - int shift = 8 - params->tex_lod[tmu][lod]; - rgba_u *pal; - - //voodoo_log(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]); - - - switch (params->tformat[tmu]) - { - case TEX_RGB332: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba(rgb332[dat].r, rgb332[dat].g, rgb332[dat].b, 0xff); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_Y4I2Q2: - pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_A8: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba(dat, dat, dat, dat); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_I8: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba(dat, dat, dat, 0xff); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_AI8: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba((dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0xf0) | ((dat >> 4) & 0x0f)); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_PAL8: - pal = voodoo->palette[tmu]; - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_APAL8: - pal = voodoo->palette[tmu]; - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; - - int r = ((pal[dat].rgba.r & 3) << 6) | ((pal[dat].rgba.g & 0xf0) >> 2) | (pal[dat].rgba.r & 3); - int g = ((pal[dat].rgba.g & 0xf) << 4) | ((pal[dat].rgba.b & 0xc0) >> 4) | ((pal[dat].rgba.g & 0xf) >> 2); - int b = ((pal[dat].rgba.b & 0x3f) << 2) | ((pal[dat].rgba.b & 0x30) >> 4); - int a = (pal[dat].rgba.r & 0xfc) | ((pal[dat].rgba.r & 0xc0) >> 6); - - base[x] = makergba(r, g, b, a); - } - tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); - base += (1 << shift); - } - break; - - case TEX_ARGB8332: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(rgb332[dat & 0xff].r, rgb332[dat & 0xff].g, rgb332[dat & 0xff].b, dat >> 8); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_A8Y4I2Q2: - pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_R5G6B5: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(rgb565[dat].r, rgb565[dat].g, rgb565[dat].b, 0xff); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_ARGB1555: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(argb1555[dat].r, argb1555[dat].g, argb1555[dat].b, argb1555[dat].a); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_ARGB4444: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(argb4444[dat].r, argb4444[dat].g, argb4444[dat].b, argb4444[dat].a); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_A8I8: - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(dat & 0xff, dat & 0xff, dat & 0xff, dat >> 8); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - case TEX_APAL88: - pal = voodoo->palette[tmu]; - for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) - { - for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) - { - uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; - - base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); - } - tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); - base += (1 << shift); - } - break; - - default: - fatal("Unknown texture format %i\n", params->tformat[tmu]); - } - } - - voodoo->texture_cache[tmu][c].is16 = voodoo->params.tformat[tmu] & 8; - - if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) - voodoo->texture_cache[tmu][c].palette_checksum = palette_checksum; - else - voodoo->texture_cache[tmu][c].palette_checksum = 0; - - if (lod_min == 0) - { - voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->params.tex_base[tmu][0]; - voodoo->texture_cache[tmu][c].addr_end[0] = voodoo->params.tex_end[tmu][0]; - } - else - voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->texture_cache[tmu][c].addr_end[0] = 0; - - if (lod_min <= 1 && lod_max >= 1) - { - voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->params.tex_base[tmu][1]; - voodoo->texture_cache[tmu][c].addr_end[1] = voodoo->params.tex_end[tmu][1]; - } - else - voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->texture_cache[tmu][c].addr_end[1] = 0; - - if (lod_min <= 2 && lod_max >= 2) - { - voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->params.tex_base[tmu][2]; - voodoo->texture_cache[tmu][c].addr_end[2] = voodoo->params.tex_end[tmu][2]; - } - else - voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->texture_cache[tmu][c].addr_end[2] = 0; - - if (lod_max >= 3) - { - voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->params.tex_base[tmu][(lod_min > 3) ? lod_min : 3]; - voodoo->texture_cache[tmu][c].addr_end[3] = voodoo->params.tex_end[tmu][(lod_max < 8) ? lod_max : 8]; - } - else - voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->texture_cache[tmu][c].addr_end[3] = 0; - - - for (d = 0; d < 4; d++) - { - addr = voodoo->texture_cache[tmu][c].addr_start[d]; - addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; - - if (addr_end != 0) - { - for (; addr <= addr_end; addr += (1 << TEX_DIRTY_SHIFT)) - voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; - } - } - - params->tex_entry[tmu] = c; - voodoo->texture_cache[tmu][c].refcount++; -} - -static void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu) -{ - int wait_for_idle = 0; - int c; - - memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0])); -// voodoo_log("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present)); - for (c = 0; c < TEX_CACHE_MAX; c++) - { - if (voodoo->texture_cache[tmu][c].base != -1) - { - int d; - - for (d = 0; d < 4; d++) - { - int addr_start = voodoo->texture_cache[tmu][c].addr_start[d]; - int addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; - - if (addr_end != 0) - { - int addr_start_masked = addr_start & voodoo->texture_mask & ~0x3ff; - int addr_end_masked = ((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff; - - if (addr_end_masked < addr_start_masked) - addr_end_masked = voodoo->texture_mask+1; - if (dirty_addr >= addr_start_masked && dirty_addr < addr_end_masked) - { -// voodoo_log(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base); - - if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] || - (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1])) - wait_for_idle = 1; - - voodoo->texture_cache[tmu][c].base = -1; - } - else - { - for (; addr_start <= addr_end; addr_start += (1 << TEX_DIRTY_SHIFT)) - voodoo->texture_present[tmu][(addr_start & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; - } - } - } - } - } - if (wait_for_idle) - wait_for_render_thread_idle(voodoo); -} - -typedef struct voodoo_state_t -{ - int xstart, xend, xdir; - uint32_t base_r, base_g, base_b, base_a, base_z; - struct - { - int64_t base_s, base_t, base_w; - int lod; - } tmu[2]; - int64_t base_w; - int lod; - int lod_min[2], lod_max[2]; - int dx1, dx2; - int y, yend, ydir; - int32_t dxAB, dxAC, dxBC; - int tex_b[2], tex_g[2], tex_r[2], tex_a[2]; - int tex_s, tex_t; - int clamp_s[2], clamp_t[2]; - - int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; - - uint32_t *tex[2][LOD_MAX+1]; - int tformat; - - int *tex_w_mask[2]; - int *tex_h_mask[2]; - int *tex_shift[2]; - int *tex_lod[2]; - - uint16_t *fb_mem, *aux_mem; - - int32_t ib, ig, ir, ia; - int32_t z; - - int32_t new_depth; - - int64_t tmu0_s, tmu0_t; - int64_t tmu0_w; - int64_t tmu1_s, tmu1_t; - int64_t tmu1_w; - int64_t w; - - int pixel_count, texel_count; - int x, x2; - - uint32_t w_depth; - - float log_temp; - uint32_t ebp_store; - uint32_t texBaseAddr; - - int lod_frac[2]; -} voodoo_state_t; - -static int voodoo_output = 0; - -static uint8_t logtable[256] = -{ - 0x00,0x01,0x02,0x04,0x05,0x07,0x08,0x09,0x0b,0x0c,0x0e,0x0f,0x10,0x12,0x13,0x15, - 0x16,0x17,0x19,0x1a,0x1b,0x1d,0x1e,0x1f,0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x2a, - 0x2b,0x2c,0x2e,0x2f,0x30,0x31,0x33,0x34,0x35,0x36,0x38,0x39,0x3a,0x3b,0x3d,0x3e, - 0x3f,0x40,0x41,0x43,0x44,0x45,0x46,0x47,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x50,0x51, - 0x52,0x53,0x54,0x55,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x60,0x61,0x62,0x63, - 0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, - 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x83,0x84,0x85, - 0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94, - 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa2,0xa3, - 0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xad,0xae,0xaf,0xb0,0xb1,0xb2, - 0xb3,0xb4,0xb5,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbc,0xbd,0xbe,0xbf,0xc0, - 0xc1,0xc2,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xcd, - 0xce,0xcf,0xd0,0xd1,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd6,0xd7,0xd8,0xd9,0xda,0xda, - 0xdb,0xdc,0xdd,0xde,0xde,0xdf,0xe0,0xe1,0xe1,0xe2,0xe3,0xe4,0xe5,0xe5,0xe6,0xe7, - 0xe8,0xe8,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3, - 0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff -}; - -static inline int fastlog(uint64_t val) -{ - uint64_t oldval = val; - int exp = 63; - int frac; - - if (!val || val & (1ULL << 63)) - return 0x80000000; - - if (!(val & 0xffffffff00000000)) - { - exp -= 32; - val <<= 32; - } - if (!(val & 0xffff000000000000)) - { - exp -= 16; - val <<= 16; - } - if (!(val & 0xff00000000000000)) - { - exp -= 8; - val <<= 8; - } - if (!(val & 0xf000000000000000)) - { - exp -= 4; - val <<= 4; - } - if (!(val & 0xc000000000000000)) - { - exp -= 2; - val <<= 2; - } - if (!(val & 0x8000000000000000)) - { - exp -= 1; - val <<= 1; - } - - if (exp >= 8) - frac = (oldval >> (exp - 8)) & 0xff; - else - frac = (oldval << (8 - exp)) & 0xff; - - return (exp << 8) | logtable[frac]; -} - -static inline int voodoo_fls(uint16_t val) -{ - int num = 0; - -//voodoo_log("fls(%04x) = ", val); - if (!(val & 0xff00)) - { - num += 8; - val <<= 8; - } - if (!(val & 0xf000)) - { - num += 4; - val <<= 4; - } - if (!(val & 0xc000)) - { - num += 2; - val <<= 2; - } - if (!(val & 0x8000)) - { - num += 1; - val <<= 1; - } -//voodoo_log("%i %04x\n", num, val); - return num; -} - -typedef struct voodoo_texture_state_t -{ - int s, t; - int w_mask, h_mask; - int tex_shift; -} voodoo_texture_state_t; - -static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) -{ - uint32_t dat; - - if (texture_state->s & ~texture_state->w_mask) - { - if (state->clamp_s[tmu]) - { - if (texture_state->s < 0) - texture_state->s = 0; - if (texture_state->s > texture_state->w_mask) - texture_state->s = texture_state->w_mask; - } - else - texture_state->s &= texture_state->w_mask; - } - if (texture_state->t & ~texture_state->h_mask) - { - if (state->clamp_t[tmu]) - { - if (texture_state->t < 0) - texture_state->t = 0; - if (texture_state->t > texture_state->h_mask) - texture_state->t = texture_state->h_mask; - } - else - texture_state->t &= texture_state->h_mask; - } - - dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; - - state->tex_b[tmu] = dat & 0xff; - state->tex_g[tmu] = (dat >> 8) & 0xff; - state->tex_r[tmu] = (dat >> 16) & 0xff; - state->tex_a[tmu] = (dat >> 24) & 0xff; -} - -#define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4)) -#define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4)) - -static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) -{ - rgba_u dat[4]; - - if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask)) - { - int c; - for (c = 0; c < 4; c++) - { - int _s = s + (c & 1); - int _t = t + ((c & 2) >> 1); - - if (_s & ~texture_state->w_mask) - { - if (state->clamp_s[tmu]) - { - if (_s < 0) - _s = 0; - if (_s > texture_state->w_mask) - _s = texture_state->w_mask; - } - else - _s &= texture_state->w_mask; - } - if (_t & ~texture_state->h_mask) - { - if (state->clamp_t[tmu]) - { - if (_t < 0) - _t = 0; - if (_t > texture_state->h_mask) - _t = texture_state->h_mask; - } - else - _t &= texture_state->h_mask; - } - dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)]; - } - } - else - { - dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)]; - dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)]; - dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)]; - dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; - } - - state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8; - state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8; - state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8; - state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8; -} - -static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) -{ - voodoo_texture_state_t texture_state; - int d[4]; - int s, t; - int tex_lod = state->tex_lod[tmu][state->lod]; - - texture_state.w_mask = state->tex_w_mask[tmu][state->lod]; - texture_state.h_mask = state->tex_h_mask[tmu][state->lod]; - texture_state.tex_shift = 8 - tex_lod; - - if (params->tLOD[tmu] & LOD_TMIRROR_S) - { - if (state->tex_s & 0x1000) - state->tex_s = ~state->tex_s; - } - if (params->tLOD[tmu] & LOD_TMIRROR_T) - { - if (state->tex_t & 0x1000) - state->tex_t = ~state->tex_t; - } - - if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6) - { - int _ds, dt; - - state->tex_s -= 1 << (3+tex_lod); - state->tex_t -= 1 << (3+tex_lod); - - s = state->tex_s >> tex_lod; - t = state->tex_t >> tex_lod; - - _ds = s & 0xf; - dt = t & 0xf; - - s >>= 4; - t >>= 4; -//if (x == 80) -//if (voodoo_output) -// voodoo_log("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt); - d[0] = (16 - _ds) * (16 - dt); - d[1] = _ds * (16 - dt); - d[2] = (16 - _ds) * dt; - d[3] = _ds * dt; - -// texture_state.s = s; -// texture_state.t = t; - tex_read_4(state, &texture_state, s, t, d, tmu, x); - - -/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8; - state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8; - state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8; - state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/ -/* state->tex_r = tex_samples[0].r; - state->tex_g = tex_samples[0].g; - state->tex_b = tex_samples[0].b; - state->tex_a = tex_samples[0].a;*/ - } - else - { - // rgba_t tex_samples; - // voodoo_texture_state_t texture_state; -// int s = state->tex_s >> (18+state->lod); -// int t = state->tex_t >> (18+state->lod); - // int s, t; - -// state->tex_s -= 1 << (17+state->lod); -// state->tex_t -= 1 << (17+state->lod); - - s = state->tex_s >> (4+tex_lod); - t = state->tex_t >> (4+tex_lod); - - texture_state.s = s; - texture_state.t = t; - tex_read(state, &texture_state, tmu); - -/* state->tex_r = tex_samples[0].rgba.r; - state->tex_g = tex_samples[0].rgba.g; - state->tex_b = tex_samples[0].rgba.b; - state->tex_a = tex_samples[0].rgba.a;*/ - } -} - -static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) -{ - if (params->textureMode[tmu] & 1) - { - int64_t _w = 0; - - if (tmu) - { - if (state->tmu1_w) - _w = (int64_t)((1ULL << 48) / state->tmu1_w); - state->tex_s = (int32_t)(((((state->tmu1_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); - state->tex_t = (int32_t)(((((state->tmu1_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); - } - else - { - if (state->tmu0_w) - _w = (int64_t)((1ULL << 48) / state->tmu0_w); - state->tex_s = (int32_t)(((((state->tmu0_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); - state->tex_t = (int32_t)(((((state->tmu0_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); - } - - state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8)); - } - else - { - if (tmu) - { - state->tex_s = (int32_t)(state->tmu1_s >> (14+14)); - state->tex_t = (int32_t)(state->tmu1_t >> (14+14)); - } - else - { - state->tex_s = (int32_t)(state->tmu0_s >> (14+14)); - state->tex_t = (int32_t)(state->tmu0_t >> (14+14)); - } - state->lod = state->tmu[tmu].lod; - } - - if (state->lod < state->lod_min[tmu]) - state->lod = state->lod_min[tmu]; - else if (state->lod > state->lod_max[tmu]) - state->lod = state->lod_max[tmu]; - state->lod_frac[tmu] = state->lod & 0xff; - state->lod >>= 8; - - voodoo_get_texture(voodoo, params, state, tmu, x); -} - -#define DEPTH_TEST(comp_depth) \ - do \ - { \ - switch (depth_op) \ - { \ - case DEPTHOP_NEVER: \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - case DEPTHOP_LESSTHAN: \ - if (!(comp_depth < old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_EQUAL: \ - if (!(comp_depth == old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_LESSTHANEQUAL: \ - if (!(comp_depth <= old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_GREATERTHAN: \ - if (!(comp_depth > old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_NOTEQUAL: \ - if (!(comp_depth != old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_GREATERTHANEQUAL: \ - if (!(comp_depth >= old_depth)) \ - { \ - voodoo->fbiZFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case DEPTHOP_ALWAYS: \ - break; \ - } \ - } while (0) - -#define APPLY_FOG(src_r, src_g, src_b, z, ia, w) \ - do \ - { \ - if (params->fogMode & FOG_CONSTANT) \ - { \ - src_r += params->fogColor.r; \ - src_g += params->fogColor.g; \ - src_b += params->fogColor.b; \ - } \ - else \ - { \ - int fog_r, fog_g, fog_b, fog_a = 0; \ - int fog_idx; \ - \ - if (!(params->fogMode & FOG_ADD)) \ - { \ - fog_r = params->fogColor.r; \ - fog_g = params->fogColor.g; \ - fog_b = params->fogColor.b; \ - } \ - else \ - fog_r = fog_g = fog_b = 0; \ - \ - if (!(params->fogMode & FOG_MULT)) \ - { \ - fog_r -= src_r; \ - fog_g -= src_g; \ - fog_b -= src_b; \ - } \ - \ - switch (params->fogMode & (FOG_Z|FOG_ALPHA)) \ - { \ - case 0: \ - fog_idx = (w_depth >> 10) & 0x3f; \ - \ - fog_a = params->fogTable[fog_idx].fog; \ - fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10; \ - break; \ - case FOG_Z: \ - fog_a = (z >> 20) & 0xff; \ - break; \ - case FOG_ALPHA: \ - fog_a = CLAMP(ia >> 12); \ - break; \ - case FOG_W: \ - fog_a = CLAMP((w >> 32) & 0xff); \ - break; \ - } \ - fog_a++; \ - \ - fog_r = (fog_r * fog_a) >> 8; \ - fog_g = (fog_g * fog_a) >> 8; \ - fog_b = (fog_b * fog_a) >> 8; \ - \ - if (params->fogMode & FOG_MULT) \ - { \ - src_r = fog_r; \ - src_g = fog_g; \ - src_b = fog_b; \ - } \ - else \ - { \ - src_r += fog_r; \ - src_g += fog_g; \ - src_b += fog_b; \ - } \ - } \ - \ - src_r = CLAMP(src_r); \ - src_g = CLAMP(src_g); \ - src_b = CLAMP(src_b); \ - } while (0) - -#define ALPHA_TEST(src_a) \ - do \ - { \ - switch (alpha_func) \ - { \ - case AFUNC_NEVER: \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - case AFUNC_LESSTHAN: \ - if (!(src_a < a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_EQUAL: \ - if (!(src_a == a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_LESSTHANEQUAL: \ - if (!(src_a <= a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_GREATERTHAN: \ - if (!(src_a > a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_NOTEQUAL: \ - if (!(src_a != a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_GREATERTHANEQUAL: \ - if (!(src_a >= a_ref)) \ - { \ - voodoo->fbiAFuncFail++; \ - goto skip_pixel; \ - } \ - break; \ - case AFUNC_ALWAYS: \ - break; \ - } \ - } while (0) - -#define ALPHA_BLEND(src_r, src_g, src_b, src_a) \ - do \ - { \ - int _a; \ - int newdest_r = 0, newdest_g = 0, newdest_b = 0; \ - \ - switch (dest_afunc) \ - { \ - case AFUNC_AZERO: \ - newdest_r = newdest_g = newdest_b = 0; \ - break; \ - case AFUNC_ASRC_ALPHA: \ - newdest_r = (dest_r * src_a) / 255; \ - newdest_g = (dest_g * src_a) / 255; \ - newdest_b = (dest_b * src_a) / 255; \ - break; \ - case AFUNC_A_COLOR: \ - newdest_r = (dest_r * src_r) / 255; \ - newdest_g = (dest_g * src_g) / 255; \ - newdest_b = (dest_b * src_b) / 255; \ - break; \ - case AFUNC_ADST_ALPHA: \ - newdest_r = (dest_r * dest_a) / 255; \ - newdest_g = (dest_g * dest_a) / 255; \ - newdest_b = (dest_b * dest_a) / 255; \ - break; \ - case AFUNC_AONE: \ - newdest_r = dest_r; \ - newdest_g = dest_g; \ - newdest_b = dest_b; \ - break; \ - case AFUNC_AOMSRC_ALPHA: \ - newdest_r = (dest_r * (255-src_a)) / 255; \ - newdest_g = (dest_g * (255-src_a)) / 255; \ - newdest_b = (dest_b * (255-src_a)) / 255; \ - break; \ - case AFUNC_AOM_COLOR: \ - newdest_r = (dest_r * (255-src_r)) / 255; \ - newdest_g = (dest_g * (255-src_g)) / 255; \ - newdest_b = (dest_b * (255-src_b)) / 255; \ - break; \ - case AFUNC_AOMDST_ALPHA: \ - newdest_r = (dest_r * (255-dest_a)) / 255; \ - newdest_g = (dest_g * (255-dest_a)) / 255; \ - newdest_b = (dest_b * (255-dest_a)) / 255; \ - break; \ - case AFUNC_ASATURATE: \ - _a = MIN(src_a, 1-dest_a); \ - newdest_r = (dest_r * _a) / 255; \ - newdest_g = (dest_g * _a) / 255; \ - newdest_b = (dest_b * _a) / 255; \ - break; \ - } \ - \ - switch (src_afunc) \ - { \ - case AFUNC_AZERO: \ - src_r = src_g = src_b = 0; \ - break; \ - case AFUNC_ASRC_ALPHA: \ - src_r = (src_r * src_a) / 255; \ - src_g = (src_g * src_a) / 255; \ - src_b = (src_b * src_a) / 255; \ - break; \ - case AFUNC_A_COLOR: \ - src_r = (src_r * dest_r) / 255; \ - src_g = (src_g * dest_g) / 255; \ - src_b = (src_b * dest_b) / 255; \ - break; \ - case AFUNC_ADST_ALPHA: \ - src_r = (src_r * dest_a) / 255; \ - src_g = (src_g * dest_a) / 255; \ - src_b = (src_b * dest_a) / 255; \ - break; \ - case AFUNC_AONE: \ - break; \ - case AFUNC_AOMSRC_ALPHA: \ - src_r = (src_r * (255-src_a)) / 255; \ - src_g = (src_g * (255-src_a)) / 255; \ - src_b = (src_b * (255-src_a)) / 255; \ - break; \ - case AFUNC_AOM_COLOR: \ - src_r = (src_r * (255-dest_r)) / 255; \ - src_g = (src_g * (255-dest_g)) / 255; \ - src_b = (src_b * (255-dest_b)) / 255; \ - break; \ - case AFUNC_AOMDST_ALPHA: \ - src_r = (src_r * (255-dest_a)) / 255; \ - src_g = (src_g * (255-dest_a)) / 255; \ - src_b = (src_b * (255-dest_a)) / 255; \ - break; \ - case AFUNC_ACOLORBEFOREFOG: \ - fatal("AFUNC_ACOLORBEFOREFOG\n"); \ - break; \ - } \ - \ - src_r += newdest_r; \ - src_g += newdest_g; \ - src_b += newdest_b; \ - \ - src_r = CLAMP(src_r); \ - src_g = CLAMP(src_g); \ - src_b = CLAMP(src_b); \ - } while(0) - - -#define _rgb_sel ( params->fbzColorPath & 3) -#define a_sel ( (params->fbzColorPath >> 2) & 3) -#define cc_localselect ( params->fbzColorPath & (1 << 4)) -#define cca_localselect ( (params->fbzColorPath >> 5) & 3) -#define cc_localselect_override ( params->fbzColorPath & (1 << 7)) -#define cc_zero_other ( params->fbzColorPath & (1 << 8)) -#define cc_sub_clocal ( params->fbzColorPath & (1 << 9)) -#define cc_mselect ( (params->fbzColorPath >> 10) & 7) -#define cc_reverse_blend ( params->fbzColorPath & (1 << 13)) -#define cc_add ( (params->fbzColorPath >> 14) & 3) -#define cc_add_alocal ( params->fbzColorPath & (1 << 15)) -#define cc_invert_output ( params->fbzColorPath & (1 << 16)) -#define cca_zero_other ( params->fbzColorPath & (1 << 17)) -#define cca_sub_clocal ( params->fbzColorPath & (1 << 18)) -#define cca_mselect ( (params->fbzColorPath >> 19) & 7) -#define cca_reverse_blend ( params->fbzColorPath & (1 << 22)) -#define cca_add ( (params->fbzColorPath >> 23) & 3) -#define cca_invert_output ( params->fbzColorPath & (1 << 25)) -#define tc_zero_other (params->textureMode[0] & (1 << 12)) -#define tc_sub_clocal (params->textureMode[0] & (1 << 13)) -#define tc_mselect ((params->textureMode[0] >> 14) & 7) -#define tc_reverse_blend (params->textureMode[0] & (1 << 17)) -#define tc_add_clocal (params->textureMode[0] & (1 << 18)) -#define tc_add_alocal (params->textureMode[0] & (1 << 19)) -#define tc_invert_output (params->textureMode[0] & (1 << 20)) -#define tca_zero_other (params->textureMode[0] & (1 << 21)) -#define tca_sub_clocal (params->textureMode[0] & (1 << 22)) -#define tca_mselect ((params->textureMode[0] >> 23) & 7) -#define tca_reverse_blend (params->textureMode[0] & (1 << 26)) -#define tca_add_clocal (params->textureMode[0] & (1 << 27)) -#define tca_add_alocal (params->textureMode[0] & (1 << 28)) -#define tca_invert_output (params->textureMode[0] & (1 << 29)) - -#define tc_sub_clocal_1 (params->textureMode[1] & (1 << 13)) -#define tc_mselect_1 ((params->textureMode[1] >> 14) & 7) -#define tc_reverse_blend_1 (params->textureMode[1] & (1 << 17)) -#define tc_add_clocal_1 (params->textureMode[1] & (1 << 18)) -#define tc_add_alocal_1 (params->textureMode[1] & (1 << 19)) -#define tca_sub_clocal_1 (params->textureMode[1] & (1 << 22)) -#define tca_mselect_1 ((params->textureMode[1] >> 23) & 7) -#define tca_reverse_blend_1 (params->textureMode[1] & (1 << 26)) -#define tca_add_clocal_1 (params->textureMode[1] & (1 << 27)) -#define tca_add_alocal_1 (params->textureMode[1] & (1 << 28)) - -#define src_afunc ( (params->alphaMode >> 8) & 0xf) -#define dest_afunc ( (params->alphaMode >> 12) & 0xf) -#define alpha_func ( (params->alphaMode >> 1) & 7) -#define a_ref ( params->alphaMode >> 24) -#define depth_op ( (params->fbzMode >> 5) & 7) -#define dither ( params->fbzMode & FBZ_DITHER) -#define dither2x2 (params->fbzMode & FBZ_DITHER_2x2) - -/*Perform texture fetch and blending for both TMUs*/ -static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) -{ - int r,g,b,a; - int c_reverse, a_reverse; -// int c_reverse1, a_reverse1; - int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0; - - voodoo_tmu_fetch(voodoo, params, state, 1, x); - - if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) - { - c_reverse = tc_reverse_blend; - a_reverse = tca_reverse_blend; - } - else - { - c_reverse = !tc_reverse_blend; - a_reverse = !tca_reverse_blend; - } -/* c_reverse1 = c_reverse; - a_reverse1 = a_reverse;*/ - if (tc_sub_clocal_1) - { - switch (tc_mselect_1) - { - case TC_MSELECT_ZERO: - factor_r = factor_g = factor_b = 0; - break; - case TC_MSELECT_CLOCAL: - factor_r = state->tex_r[1]; - factor_g = state->tex_g[1]; - factor_b = state->tex_b[1]; - break; - case TC_MSELECT_AOTHER: - factor_r = factor_g = factor_b = 0; - break; - case TC_MSELECT_ALOCAL: - factor_r = factor_g = factor_b = state->tex_a[1]; - break; - case TC_MSELECT_DETAIL: - factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; - if (factor_r > params->detail_max[1]) - factor_r = params->detail_max[1]; - factor_g = factor_b = factor_r; - break; - case TC_MSELECT_LOD_FRAC: - factor_r = factor_g = factor_b = state->lod_frac[1]; - break; - } - if (!c_reverse) - { - r = (-state->tex_r[1] * (factor_r + 1)) >> 8; - g = (-state->tex_g[1] * (factor_g + 1)) >> 8; - b = (-state->tex_b[1] * (factor_b + 1)) >> 8; - } - else - { - r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8; - g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8; - b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8; - } - if (tc_add_clocal_1) - { - r += state->tex_r[1]; - g += state->tex_g[1]; - b += state->tex_b[1]; - } - else if (tc_add_alocal_1) - { - r += state->tex_a[1]; - g += state->tex_a[1]; - b += state->tex_a[1]; - } - state->tex_r[1] = CLAMP(r); - state->tex_g[1] = CLAMP(g); - state->tex_b[1] = CLAMP(b); - } - if (tca_sub_clocal_1) - { - switch (tca_mselect_1) - { - case TCA_MSELECT_ZERO: - factor_a = 0; - break; - case TCA_MSELECT_CLOCAL: - factor_a = state->tex_a[1]; - break; - case TCA_MSELECT_AOTHER: - factor_a = 0; - break; - case TCA_MSELECT_ALOCAL: - factor_a = state->tex_a[1]; - break; - case TCA_MSELECT_DETAIL: - factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; - if (factor_a > params->detail_max[1]) - factor_a = params->detail_max[1]; - break; - case TCA_MSELECT_LOD_FRAC: - factor_a = state->lod_frac[1]; - break; - } - if (!a_reverse) - a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8; - else - a = (-state->tex_a[1] * (factor_a + 1)) >> 8; - if (tca_add_clocal_1 || tca_add_alocal_1) - a += state->tex_a[1]; - state->tex_a[1] = CLAMP(a); - } - - - voodoo_tmu_fetch(voodoo, params, state, 0, x); - - if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) - { - c_reverse = tc_reverse_blend; - a_reverse = tca_reverse_blend; - } - else - { - c_reverse = !tc_reverse_blend; - a_reverse = !tca_reverse_blend; - } - - if (!tc_zero_other) - { - r = state->tex_r[1]; - g = state->tex_g[1]; - b = state->tex_b[1]; - } - else - r = g = b = 0; - if (tc_sub_clocal) - { - r -= state->tex_r[0]; - g -= state->tex_g[0]; - b -= state->tex_b[0]; - } - switch (tc_mselect) - { - case TC_MSELECT_ZERO: - factor_r = factor_g = factor_b = 0; - break; - case TC_MSELECT_CLOCAL: - factor_r = state->tex_r[0]; - factor_g = state->tex_g[0]; - factor_b = state->tex_b[0]; - break; - case TC_MSELECT_AOTHER: - factor_r = factor_g = factor_b = state->tex_a[1]; - break; - case TC_MSELECT_ALOCAL: - factor_r = factor_g = factor_b = state->tex_a[0]; - break; - case TC_MSELECT_DETAIL: - factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; - if (factor_r > params->detail_max[0]) - factor_r = params->detail_max[0]; - factor_g = factor_b = factor_r; - break; - case TC_MSELECT_LOD_FRAC: - factor_r = factor_g = factor_b = state->lod_frac[0]; - break; - } - if (!c_reverse) - { - r = (r * (factor_r + 1)) >> 8; - g = (g * (factor_g + 1)) >> 8; - b = (b * (factor_b + 1)) >> 8; - } - else - { - r = (r * ((factor_r^0xff) + 1)) >> 8; - g = (g * ((factor_g^0xff) + 1)) >> 8; - b = (b * ((factor_b^0xff) + 1)) >> 8; - } - if (tc_add_clocal) - { - r += state->tex_r[0]; - g += state->tex_g[0]; - b += state->tex_b[0]; - } - else if (tc_add_alocal) - { - r += state->tex_a[0]; - g += state->tex_a[0]; - b += state->tex_a[0]; - } - - if (!tca_zero_other) - a = state->tex_a[1]; - else - a = 0; - if (tca_sub_clocal) - a -= state->tex_a[0]; - switch (tca_mselect) - { - case TCA_MSELECT_ZERO: - factor_a = 0; - break; - case TCA_MSELECT_CLOCAL: - factor_a = state->tex_a[0]; - break; - case TCA_MSELECT_AOTHER: - factor_a = state->tex_a[1]; - break; - case TCA_MSELECT_ALOCAL: - factor_a = state->tex_a[0]; - break; - case TCA_MSELECT_DETAIL: - factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; - if (factor_a > params->detail_max[0]) - factor_a = params->detail_max[0]; - break; - case TCA_MSELECT_LOD_FRAC: - factor_a = state->lod_frac[0]; - break; - } - if (a_reverse) - a = (a * ((factor_a ^ 0xff) + 1)) >> 8; - else - a = (a * (factor_a + 1)) >> 8; - if (tca_add_clocal || tca_add_alocal) - a += state->tex_a[0]; - - - state->tex_r[0] = CLAMP(r); - state->tex_g[0] = CLAMP(g); - state->tex_b[0] = CLAMP(b); - state->tex_a[0] = CLAMP(a); - - if (tc_invert_output) - { - state->tex_r[0] ^= 0xff; - state->tex_g[0] ^= 0xff; - state->tex_b[0] ^= 0xff; - } - if (tca_invert_output) - state->tex_a[0] ^= 0xff; -} - -#if ((defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86) && !(defined __amd64__ || defined _M_X64) && (defined USE_DYNAREC)) -#include <86box/vid_voodoo_codegen_x86.h> -#elif ((defined __amd64__ || defined _M_X64) && (defined USE_DYNAREC)) -#include <86box/vid_voodoo_codegen_x86-64.h> -#else -#define NO_CODEGEN -#endif - -static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int ystart, int yend, int odd_even) -{ -/* int rgb_sel = params->fbzColorPath & 3; - int a_sel = (params->fbzColorPath >> 2) & 3; - int cc_localselect = params->fbzColorPath & (1 << 4); - int cca_localselect = (params->fbzColorPath >> 5) & 3; - int cc_localselect_override = params->fbzColorPath & (1 << 7); - int cc_zero_other = params->fbzColorPath & (1 << 8); - int cc_sub_clocal = params->fbzColorPath & (1 << 9); - int cc_mselect = (params->fbzColorPath >> 10) & 7; - int cc_reverse_blend = params->fbzColorPath & (1 << 13); - int cc_add = (params->fbzColorPath >> 14) & 3; - int cc_add_alocal = params->fbzColorPath & (1 << 15); - int cc_invert_output = params->fbzColorPath & (1 << 16); - int cca_zero_other = params->fbzColorPath & (1 << 17); - int cca_sub_clocal = params->fbzColorPath & (1 << 18); - int cca_mselect = (params->fbzColorPath >> 19) & 7; - int cca_reverse_blend = params->fbzColorPath & (1 << 22); - int cca_add = (params->fbzColorPath >> 23) & 3; - int cca_invert_output = params->fbzColorPath & (1 << 25); - int src_afunc = (params->alphaMode >> 8) & 0xf; - int dest_afunc = (params->alphaMode >> 12) & 0xf; - int alpha_func = (params->alphaMode >> 1) & 7; - int a_ref = params->alphaMode >> 24; - int depth_op = (params->fbzMode >> 5) & 7; - int dither = params->fbzMode & FBZ_DITHER;*/ - int texels; - int c; -#ifndef NO_CODEGEN - uint8_t (*voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y); -#endif - uint8_t cother_r = 0, cother_g = 0, cother_b = 0; - int y_diff = SLI_ENABLED ? 2 : 1; - - if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || - (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) - texels = 1; - else - texels = 2; - - state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS; - state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT; - state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS; - state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT; -// int last_x; -// voodoo_log("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir); - - for (c = 0; c <= LOD_MAX; c++) - { - state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]]; - state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]]; - } - - state->tformat = params->tformat[0]; - - state->tex_w_mask[0] = params->tex_w_mask[0]; - state->tex_h_mask[0] = params->tex_h_mask[0]; - state->tex_shift[0] = params->tex_shift[0]; - state->tex_lod[0] = params->tex_lod[0]; - state->tex_w_mask[1] = params->tex_w_mask[1]; - state->tex_h_mask[1] = params->tex_h_mask[1]; - state->tex_shift[1] = params->tex_shift[1]; - state->tex_lod[1] = params->tex_lod[1]; - - if ((params->fbzMode & 1) && (ystart < params->clipLowY)) - { - int dy = params->clipLowY - ystart; - - state->base_r += params->dRdY*dy; - state->base_g += params->dGdY*dy; - state->base_b += params->dBdY*dy; - state->base_a += params->dAdY*dy; - state->base_z += params->dZdY*dy; - state->tmu[0].base_s += params->tmu[0].dSdY*dy; - state->tmu[0].base_t += params->tmu[0].dTdY*dy; - state->tmu[0].base_w += params->tmu[0].dWdY*dy; - state->tmu[1].base_s += params->tmu[1].dSdY*dy; - state->tmu[1].base_t += params->tmu[1].dTdY*dy; - state->tmu[1].base_w += params->tmu[1].dWdY*dy; - state->base_w += params->dWdY*dy; - state->xstart += state->dx1*dy; - state->xend += state->dx2*dy; - - ystart = params->clipLowY; - } - - if ((params->fbzMode & 1) && (yend >= params->clipHighY)) - yend = params->clipHighY-1; - - state->y = ystart; -// yend--; - - if (SLI_ENABLED) - { - int test_y; - - if (params->fbzMode & (1 << 17)) - test_y = (voodoo->v_disp-1) - state->y; - else - test_y = state->y; - - if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (test_y & 1)) || - ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(test_y & 1))) - { - state->y++; - - state->base_r += params->dRdY; - state->base_g += params->dGdY; - state->base_b += params->dBdY; - state->base_a += params->dAdY; - state->base_z += params->dZdY; - state->tmu[0].base_s += params->tmu[0].dSdY; - state->tmu[0].base_t += params->tmu[0].dTdY; - state->tmu[0].base_w += params->tmu[0].dWdY; - state->tmu[1].base_s += params->tmu[1].dSdY; - state->tmu[1].base_t += params->tmu[1].dTdY; - state->tmu[1].base_w += params->tmu[1].dWdY; - state->base_w += params->dWdY; - state->xstart += state->dx1; - state->xend += state->dx2; - } - } -#ifndef NO_CODEGEN - if (voodoo->use_recompiler) - voodoo_draw = voodoo_get_block(voodoo, params, state, odd_even); - else - voodoo_draw = NULL; -#endif - - if (voodoo_output) - voodoo_log("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC); -// voodoo_log("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17)); - - for (; state->y < yend; state->y += y_diff) - { - int x, x2; - int real_y = (state->y << 4) + 8; - int start_x; -#ifdef ENABLE_VOODOO_LOG - int start_x2; -#endif - int dx; - uint16_t *fb_mem, *aux_mem; - - state->ir = state->base_r; - state->ig = state->base_g; - state->ib = state->base_b; - state->ia = state->base_a; - state->z = state->base_z; - state->tmu0_s = state->tmu[0].base_s; - state->tmu0_t = state->tmu[0].base_t; - state->tmu0_w = state->tmu[0].base_w; - state->tmu1_s = state->tmu[1].base_s; - state->tmu1_t = state->tmu[1].base_t; - state->tmu1_w = state->tmu[1].base_w; - state->w = state->base_w; - - x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4); - - if (real_y < state->vertexBy) - x2 = (state->vertexAx << 12) + ((state->dxAB * (real_y - state->vertexAy)) >> 4); - else - x2 = (state->vertexBx << 12) + ((state->dxBC * (real_y - state->vertexBy)) >> 4); - - if (params->fbzMode & (1 << 17)) - real_y = (voodoo->v_disp-1) - (real_y >> 4); - else - real_y >>= 4; - - if (SLI_ENABLED) - { - if (((real_y >> 1) & voodoo->odd_even_mask) != odd_even) - goto next_line; - } - else - { - if ((real_y & voodoo->odd_even_mask) != odd_even) - goto next_line; - } - - start_x = x; - - if (state->xdir > 0) - x2 -= (1 << 16); - else - x -= (1 << 16); - dx = ((x + 0x7000) >> 16) - (((state->vertexAx << 12) + 0x7000) >> 16); -#ifdef ENABLE_VOODOO_LOG - start_x2 = x + 0x7000; -#endif - x = (x + 0x7000) >> 16; - x2 = (x2 + 0x7000) >> 16; - - if (voodoo_output) - voodoo_log("%03i:%03i : Ax=%08x start_x=%08x dSdX=%016llx dx=%08x s=%08x -> ", x, state->y, state->vertexAx << 8, start_x, params->tmu[0].dTdX, dx, state->tmu0_t); - - state->ir += (params->dRdX * dx); - state->ig += (params->dGdX * dx); - state->ib += (params->dBdX * dx); - state->ia += (params->dAdX * dx); - state->z += (params->dZdX * dx); - state->tmu0_s += (params->tmu[0].dSdX * dx); - state->tmu0_t += (params->tmu[0].dTdX * dx); - state->tmu0_w += (params->tmu[0].dWdX * dx); - state->tmu1_s += (params->tmu[1].dSdX * dx); - state->tmu1_t += (params->tmu[1].dTdX * dx); - state->tmu1_w += (params->tmu[1].dWdX * dx); - state->w += (params->dWdX * dx); - - if (voodoo_output) - voodoo_log("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod)); - - if (params->fbzMode & 1) - { - if (state->xdir > 0) - { - if (x < params->clipLeft) - { - int dx = params->clipLeft - x; - - state->ir += params->dRdX*dx; - state->ig += params->dGdX*dx; - state->ib += params->dBdX*dx; - state->ia += params->dAdX*dx; - state->z += params->dZdX*dx; - state->tmu0_s += params->tmu[0].dSdX*dx; - state->tmu0_t += params->tmu[0].dTdX*dx; - state->tmu0_w += params->tmu[0].dWdX*dx; - state->tmu1_s += params->tmu[1].dSdX*dx; - state->tmu1_t += params->tmu[1].dTdX*dx; - state->tmu1_w += params->tmu[1].dWdX*dx; - state->w += params->dWdX*dx; - - x = params->clipLeft; - } - if (x2 >= params->clipRight) - x2 = params->clipRight-1; - } - else - { - if (x >= params->clipRight) - { - int dx = (params->clipRight-1) - x; - - state->ir += params->dRdX*dx; - state->ig += params->dGdX*dx; - state->ib += params->dBdX*dx; - state->ia += params->dAdX*dx; - state->z += params->dZdX*dx; - state->tmu0_s += params->tmu[0].dSdX*dx; - state->tmu0_t += params->tmu[0].dTdX*dx; - state->tmu0_w += params->tmu[0].dWdX*dx; - state->tmu1_s += params->tmu[1].dSdX*dx; - state->tmu1_t += params->tmu[1].dTdX*dx; - state->tmu1_w += params->tmu[1].dWdX*dx; - state->w += params->dWdX*dx; - - x = params->clipRight-1; - } - if (x2 < params->clipLeft) - x2 = params->clipLeft; - } - } - - if (x2 < x && state->xdir > 0) - goto next_line; - if (x2 > x && state->xdir < 0) - goto next_line; - - if (SLI_ENABLED) - { - state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + ((real_y >> 1) * voodoo->row_width)]; - state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + ((real_y >> 1) * voodoo->row_width)) & voodoo->fb_mask]; - } - else - { - state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * voodoo->row_width)]; - state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * voodoo->row_width)) & voodoo->fb_mask]; - } - - if (voodoo_output) - voodoo_log("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2); - - state->pixel_count = 0; - state->texel_count = 0; - state->x = x; - state->x2 = x2; -#ifndef NO_CODEGEN - if (voodoo->use_recompiler) - { - voodoo_draw(state, params, x, real_y); - } - else -#endif - do - { - start_x = x; - state->x = x; - voodoo->pixel_count[odd_even]++; - voodoo->texel_count[odd_even] += texels; - voodoo->fbiPixelsIn++; - - if (voodoo_output) - voodoo_log(" X=%03i T=%08x\n", x, state->tmu0_t); -// if (voodoo->fbzMode & FBZ_RGB_WMASK) - { - int update = 1; - uint8_t aother; - uint8_t clocal_r, clocal_g, clocal_b, alocal; - int src_r = 0, src_g = 0, src_b = 0, src_a = 0; - int msel_r, msel_g, msel_b, msel_a; - uint8_t dest_r, dest_g, dest_b, dest_a; - uint16_t dat; - int sel; - int32_t new_depth, w_depth; - - if (state->w & 0xffff00000000) - w_depth = 0; - else if (!(state->w & 0xffff0000)) - w_depth = 0xf001; - else - { - int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16)); - int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff; - w_depth = (exp << 12) + mant + 1; - if (w_depth > 0xffff) - w_depth = 0xffff; - } - -// w_depth = CLAMP16(w_depth); - - if (params->fbzMode & FBZ_W_BUFFER) - new_depth = w_depth; - else - new_depth = CLAMP16(state->z >> 12); - - if (params->fbzMode & FBZ_DEPTH_BIAS) - new_depth = CLAMP16(new_depth + (int16_t)params->zaColor); - - if (params->fbzMode & FBZ_DEPTH_ENABLE) - { - uint16_t old_depth = aux_mem[x]; - - DEPTH_TEST((params->fbzMode & FBZ_DEPTH_SOURCE) ? (params->zaColor & 0xffff) : new_depth); - } - - dat = fb_mem[x]; - dest_r = (dat >> 8) & 0xf8; - dest_g = (dat >> 3) & 0xfc; - dest_b = (dat << 3) & 0xf8; - dest_r |= (dest_r >> 5); - dest_g |= (dest_g >> 6); - dest_b |= (dest_b >> 5); - dest_a = 0xff; - - if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) - { - if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) - { - /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ - voodoo_tmu_fetch(voodoo, params, state, 0, x); - } - else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) - { - /*TMU0 in pass-through mode, only sample TMU1*/ - voodoo_tmu_fetch(voodoo, params, state, 1, x); - - state->tex_r[0] = state->tex_r[1]; - state->tex_g[0] = state->tex_g[1]; - state->tex_b[0] = state->tex_b[1]; - state->tex_a[0] = state->tex_a[1]; - } - else - { - voodoo_tmu_fetch_and_blend(voodoo, params, state, x); - } - - if ((params->fbzMode & FBZ_CHROMAKEY) && - state->tex_r[0] == params->chromaKey_r && - state->tex_g[0] == params->chromaKey_g && - state->tex_b[0] == params->chromaKey_b) - { - voodoo->fbiChromaFail++; - goto skip_pixel; - } - } - - if (voodoo->trexInit1[0] & (1 << 18)) - { - state->tex_r[0] = state->tex_g[0] = 0; - state->tex_b[0] = voodoo->tmuConfig; - } - - if (cc_localselect_override) - sel = (state->tex_a[0] & 0x80) ? 1 : 0; - else - sel = cc_localselect; - - if (sel) - { - clocal_r = (params->color0 >> 16) & 0xff; - clocal_g = (params->color0 >> 8) & 0xff; - clocal_b = params->color0 & 0xff; - } - else - { - clocal_r = CLAMP(state->ir >> 12); - clocal_g = CLAMP(state->ig >> 12); - clocal_b = CLAMP(state->ib >> 12); - } - - switch (_rgb_sel) - { - case CC_LOCALSELECT_ITER_RGB: /*Iterated RGB*/ - cother_r = CLAMP(state->ir >> 12); - cother_g = CLAMP(state->ig >> 12); - cother_b = CLAMP(state->ib >> 12); - break; - - case CC_LOCALSELECT_TEX: /*TREX Color Output*/ - cother_r = state->tex_r[0]; - cother_g = state->tex_g[0]; - cother_b = state->tex_b[0]; - break; - - case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/ - cother_r = (params->color1 >> 16) & 0xff; - cother_g = (params->color1 >> 8) & 0xff; - cother_b = params->color1 & 0xff; - break; - - case CC_LOCALSELECT_LFB: /*Linear Frame Buffer*/ - cother_r = src_r; - cother_g = src_g; - cother_b = src_b; - break; - } - - switch (cca_localselect) - { - case CCA_LOCALSELECT_ITER_A: - alocal = CLAMP(state->ia >> 12); - break; - - case CCA_LOCALSELECT_COLOR0: - alocal = (params->color0 >> 24) & 0xff; - break; - - case CCA_LOCALSELECT_ITER_Z: - alocal = CLAMP(state->z >> 20); - break; - - default: - fatal("Bad cca_localselect %i\n", cca_localselect); - alocal = 0xff; - break; - } - - switch (a_sel) - { - case A_SEL_ITER_A: - aother = CLAMP(state->ia >> 12); - break; - case A_SEL_TEX: - aother = state->tex_a[0]; - break; - case A_SEL_COLOR1: - aother = (params->color1 >> 24) & 0xff; - break; - default: - fatal("Bad a_sel %i\n", a_sel); - aother = 0; - break; - } - - if (cc_zero_other) - { - src_r = 0; - src_g = 0; - src_b = 0; - } - else - { - src_r = cother_r; - src_g = cother_g; - src_b = cother_b; - } - - if (cca_zero_other) - src_a = 0; - else - src_a = aother; - - if (cc_sub_clocal) - { - src_r -= clocal_r; - src_g -= clocal_g; - src_b -= clocal_b; - } - - if (cca_sub_clocal) - src_a -= alocal; - - switch (cc_mselect) - { - case CC_MSELECT_ZERO: - msel_r = 0; - msel_g = 0; - msel_b = 0; - break; - case CC_MSELECT_CLOCAL: - msel_r = clocal_r; - msel_g = clocal_g; - msel_b = clocal_b; - break; - case CC_MSELECT_AOTHER: - msel_r = aother; - msel_g = aother; - msel_b = aother; - break; - case CC_MSELECT_ALOCAL: - msel_r = alocal; - msel_g = alocal; - msel_b = alocal; - break; - case CC_MSELECT_TEX: - msel_r = state->tex_a[0]; - msel_g = state->tex_a[0]; - msel_b = state->tex_a[0]; - break; - case CC_MSELECT_TEXRGB: - msel_r = state->tex_r[0]; - msel_g = state->tex_g[0]; - msel_b = state->tex_b[0]; - break; - - default: - fatal("Bad cc_mselect %i\n", cc_mselect); - msel_r = 0; - msel_g = 0; - msel_b = 0; - break; - } - - switch (cca_mselect) - { - case CCA_MSELECT_ZERO: - msel_a = 0; - break; - case CCA_MSELECT_ALOCAL: - msel_a = alocal; - break; - case CCA_MSELECT_AOTHER: - msel_a = aother; - break; - case CCA_MSELECT_ALOCAL2: - msel_a = alocal; - break; - case CCA_MSELECT_TEX: - msel_a = state->tex_a[0]; - break; - - default: - fatal("Bad cca_mselect %i\n", cca_mselect); - msel_a = 0; - break; - } - - if (!cc_reverse_blend) - { - msel_r ^= 0xff; - msel_g ^= 0xff; - msel_b ^= 0xff; - } - msel_r++; - msel_g++; - msel_b++; - - if (!cca_reverse_blend) - msel_a ^= 0xff; - msel_a++; - - src_r = (src_r * msel_r) >> 8; - src_g = (src_g * msel_g) >> 8; - src_b = (src_b * msel_b) >> 8; - src_a = (src_a * msel_a) >> 8; - - switch (cc_add) - { - case CC_ADD_CLOCAL: - src_r += clocal_r; - src_g += clocal_g; - src_b += clocal_b; - break; - case CC_ADD_ALOCAL: - src_r += alocal; - src_g += alocal; - src_b += alocal; - break; - case 0: - break; - default: - fatal("Bad cc_add %i\n", cc_add); - } - - if (cca_add) - src_a += alocal; - - src_r = CLAMP(src_r); - src_g = CLAMP(src_g); - src_b = CLAMP(src_b); - src_a = CLAMP(src_a); - - if (cc_invert_output) - { - src_r ^= 0xff; - src_g ^= 0xff; - src_b ^= 0xff; - } - if (cca_invert_output) - src_a ^= 0xff; - - if (params->fogMode & FOG_ENABLE) - APPLY_FOG(src_r, src_g, src_b, state->z, state->ia, state->w); - - if (params->alphaMode & 1) - ALPHA_TEST(src_a); - - if (params->alphaMode & (1 << 4)) - ALPHA_BLEND(src_r, src_g, src_b, src_a); - - if (update) - { - if (dither) - { - if (dither2x2) - { - src_r = dither_rb2x2[src_r][real_y & 1][x & 1]; - src_g = dither_g2x2[src_g][real_y & 1][x & 1]; - src_b = dither_rb2x2[src_b][real_y & 1][x & 1]; - } - else - { - src_r = dither_rb[src_r][real_y & 3][x & 3]; - src_g = dither_g[src_g][real_y & 3][x & 3]; - src_b = dither_rb[src_b][real_y & 3][x & 3]; - } - } - else - { - src_r >>= 3; - src_g >>= 2; - src_b >>= 3; - } - - if (params->fbzMode & FBZ_RGB_WMASK) - fb_mem[x] = src_b | (src_g << 5) | (src_r << 11); - - if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) - aux_mem[x] = new_depth; - } - } - voodoo_output &= ~2; - voodoo->fbiPixelsOut++; -skip_pixel: - if (state->xdir > 0) - { - state->ir += params->dRdX; - state->ig += params->dGdX; - state->ib += params->dBdX; - state->ia += params->dAdX; - state->z += params->dZdX; - state->tmu0_s += params->tmu[0].dSdX; - state->tmu0_t += params->tmu[0].dTdX; - state->tmu0_w += params->tmu[0].dWdX; - state->tmu1_s += params->tmu[1].dSdX; - state->tmu1_t += params->tmu[1].dTdX; - state->tmu1_w += params->tmu[1].dWdX; - state->w += params->dWdX; - } - else - { - state->ir -= params->dRdX; - state->ig -= params->dGdX; - state->ib -= params->dBdX; - state->ia -= params->dAdX; - state->z -= params->dZdX; - state->tmu0_s -= params->tmu[0].dSdX; - state->tmu0_t -= params->tmu[0].dTdX; - state->tmu0_w -= params->tmu[0].dWdX; - state->tmu1_s -= params->tmu[1].dSdX; - state->tmu1_t -= params->tmu[1].dTdX; - state->tmu1_w -= params->tmu[1].dWdX; - state->w -= params->dWdX; - } - - x += state->xdir; - } while (start_x != x2); - - voodoo->pixel_count[odd_even] += state->pixel_count; - voodoo->texel_count[odd_even] += state->texel_count; - voodoo->fbiPixelsIn += state->pixel_count; - - if (voodoo->params.draw_offset == voodoo->params.front_offset) - voodoo->dirty_line[real_y >> 1] = 1; -next_line: - if (SLI_ENABLED) - { - state->base_r += params->dRdY; - state->base_g += params->dGdY; - state->base_b += params->dBdY; - state->base_a += params->dAdY; - state->base_z += params->dZdY; - state->tmu[0].base_s += params->tmu[0].dSdY; - state->tmu[0].base_t += params->tmu[0].dTdY; - state->tmu[0].base_w += params->tmu[0].dWdY; - state->tmu[1].base_s += params->tmu[1].dSdY; - state->tmu[1].base_t += params->tmu[1].dTdY; - state->tmu[1].base_w += params->tmu[1].dWdY; - state->base_w += params->dWdY; - state->xstart += state->dx1; - state->xend += state->dx2; - } - state->base_r += params->dRdY; - state->base_g += params->dGdY; - state->base_b += params->dBdY; - state->base_a += params->dAdY; - state->base_z += params->dZdY; - state->tmu[0].base_s += params->tmu[0].dSdY; - state->tmu[0].base_t += params->tmu[0].dTdY; - state->tmu[0].base_w += params->tmu[0].dWdY; - state->tmu[1].base_s += params->tmu[1].dSdY; - state->tmu[1].base_t += params->tmu[1].dTdY; - state->tmu[1].base_w += params->tmu[1].dWdY; - state->base_w += params->dWdY; - state->xstart += state->dx1; - state->xend += state->dx2; - } - - voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++; - voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++; -} - -static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even) -{ - voodoo_state_t state; - int vertexAy_adjusted; - int vertexCy_adjusted; - int dx, dy; - - uint64_t tempdx, tempdy; - uint64_t tempLOD; - int LOD; - int lodbias; - - memset(&state, 0x00, sizeof(voodoo_state_t)); - voodoo->tri_count++; - - dx = 8 - (params->vertexAx & 0xf); - if ((params->vertexAx & 0xf) > 8) - dx += 16; - dy = 8 - (params->vertexAy & 0xf); - if ((params->vertexAy & 0xf) > 8) - dy += 16; - -/* voodoo_log("voodoo_triangle %i %i %i : vA %f, %f vB %f, %f vC %f, %f f %i,%i %08x %08x %08x,%08x tex=%i,%i fogMode=%08x\n", odd_even, voodoo->params_read_idx[odd_even], voodoo->params_read_idx[odd_even] & PARAM_MASK, (float)params->vertexAx / 16.0, (float)params->vertexAy / 16.0, - (float)params->vertexBx / 16.0, (float)params->vertexBy / 16.0, - (float)params->vertexCx / 16.0, (float)params->vertexCy / 16.0, - (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[0] : 0, - (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[1] : 0, params->fbzColorPath, params->alphaMode, params->textureMode[0],params->textureMode[1], params->tex_entry[0],params->tex_entry[1], params->fogMode);*/ - - state.base_r = params->startR; - state.base_g = params->startG; - state.base_b = params->startB; - state.base_a = params->startA; - state.base_z = params->startZ; - state.tmu[0].base_s = params->tmu[0].startS; - state.tmu[0].base_t = params->tmu[0].startT; - state.tmu[0].base_w = params->tmu[0].startW; - state.tmu[1].base_s = params->tmu[1].startS; - state.tmu[1].base_t = params->tmu[1].startT; - state.tmu[1].base_w = params->tmu[1].startW; - state.base_w = params->startW; - - if (params->fbzColorPath & FBZ_PARAM_ADJUST) - { - state.base_r += (dx*params->dRdX + dy*params->dRdY) >> 4; - state.base_g += (dx*params->dGdX + dy*params->dGdY) >> 4; - state.base_b += (dx*params->dBdX + dy*params->dBdY) >> 4; - state.base_a += (dx*params->dAdX + dy*params->dAdY) >> 4; - state.base_z += (dx*params->dZdX + dy*params->dZdY) >> 4; - state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4; - state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4; - state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4; - state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4; - state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4; - state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4; - state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4; - } - - tris++; - - state.vertexAy = params->vertexAy & ~0xffff0000; - if (state.vertexAy & 0x8000) - state.vertexAy |= 0xffff0000; - state.vertexBy = params->vertexBy & ~0xffff0000; - if (state.vertexBy & 0x8000) - state.vertexBy |= 0xffff0000; - state.vertexCy = params->vertexCy & ~0xffff0000; - if (state.vertexCy & 0x8000) - state.vertexCy |= 0xffff0000; - - state.vertexAx = params->vertexAx & ~0xffff0000; - if (state.vertexAx & 0x8000) - state.vertexAx |= 0xffff0000; - state.vertexBx = params->vertexBx & ~0xffff0000; - if (state.vertexBx & 0x8000) - state.vertexBx |= 0xffff0000; - state.vertexCx = params->vertexCx & ~0xffff0000; - if (state.vertexCx & 0x8000) - state.vertexCx |= 0xffff0000; - - vertexAy_adjusted = (state.vertexAy+7) >> 4; - vertexCy_adjusted = (state.vertexCy+7) >> 4; - - if (state.vertexBy - state.vertexAy) - state.dxAB = (int)((((int64_t)state.vertexBx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexBy - state.vertexAy); - else - state.dxAB = 0; - if (state.vertexCy - state.vertexAy) - state.dxAC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexCy - state.vertexAy); - else - state.dxAC = 0; - if (state.vertexCy - state.vertexBy) - state.dxBC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexBx << 12)) << 4) / (int)(state.vertexCy - state.vertexBy); - else - state.dxBC = 0; - - state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6; - state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6; - if (state.lod_max[0] > 0x800) - state.lod_max[0] = 0x800; - state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6; - state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6; - if (state.lod_max[1] > 0x800) - state.lod_max[1] = 0x800; - - state.xstart = state.xend = state.vertexAx << 8; - state.xdir = params->sign ? -1 : 1; - - state.y = (state.vertexAy + 8) >> 4; - state.ydir = 1; - - - tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14); - tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14); - - if (tempdx > tempdy) - tempLOD = tempdx; - else - tempLOD = tempdy; - - LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); - LOD >>= 2; - - lodbias = (params->tLOD[0] >> 12) & 0x3f; - if (lodbias & 0x20) - lodbias |= ~0x3f; - state.tmu[0].lod = LOD + (lodbias << 6); - - - tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14); - tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14); - - if (tempdx > tempdy) - tempLOD = tempdx; - else - tempLOD = tempdy; - - LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); - LOD >>= 2; - - lodbias = (params->tLOD[1] >> 12) & 0x3f; - if (lodbias & 0x20) - lodbias |= ~0x3f; - state.tmu[1].lod = LOD + (lodbias << 6); - - - voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even); -} - -static inline void wake_render_thread(voodoo_t *voodoo) -{ - thread_set_event(voodoo->wake_render_thread[0]); /*Wake up render thread if moving from idle*/ - if (voodoo->render_threads == 2) - thread_set_event(voodoo->wake_render_thread[1]); /*Wake up render thread if moving from idle*/ -} - -static inline void wait_for_render_thread_idle(voodoo_t *voodoo) -{ - while (!PARAM_EMPTY_1 || (voodoo->render_threads == 2 && !PARAM_EMPTY_2) || voodoo->render_voodoo_busy[0] || (voodoo->render_threads == 2 && voodoo->render_voodoo_busy[1])) - { - wake_render_thread(voodoo); - if (!PARAM_EMPTY_1 || voodoo->render_voodoo_busy[0]) - thread_wait_event(voodoo->render_not_full_event[0], 1); - if (voodoo->render_threads == 2 && (!PARAM_EMPTY_2 || voodoo->render_voodoo_busy[1])) - thread_wait_event(voodoo->render_not_full_event[1], 1); - } -} - -static void render_thread(void *param, int odd_even) -{ - voodoo_t *voodoo = (voodoo_t *)param; - - while (1) - { - thread_set_event(voodoo->render_not_full_event[odd_even]); - thread_wait_event(voodoo->wake_render_thread[odd_even], -1); - thread_reset_event(voodoo->wake_render_thread[odd_even]); - voodoo->render_voodoo_busy[odd_even] = 1; - - while (!(odd_even ? PARAM_EMPTY_2 : PARAM_EMPTY_1)) - { - uint64_t start_time = plat_timer_read(); - uint64_t end_time; - voodoo_params_t *params = &voodoo->params_buffer[voodoo->params_read_idx[odd_even] & PARAM_MASK]; - - voodoo_triangle(voodoo, params, odd_even); - - voodoo->params_read_idx[odd_even]++; - - if ((odd_even ? PARAM_ENTRIES_2 : PARAM_ENTRIES_1) > (PARAM_SIZE - 10)) - thread_set_event(voodoo->render_not_full_event[odd_even]); - - end_time = plat_timer_read(); - voodoo->render_time[odd_even] += end_time - start_time; - } - - voodoo->render_voodoo_busy[odd_even] = 0; - } -} - -static void render_thread_1(void *param) -{ - render_thread(param, 0); -} -static void render_thread_2(void *param) -{ - render_thread(param, 1); -} - -static inline void queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) -{ - voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK]; - - while (PARAM_FULL_1 || (voodoo->render_threads == 2 && PARAM_FULL_2)) - { - thread_reset_event(voodoo->render_not_full_event[0]); - if (voodoo->render_threads == 2) - thread_reset_event(voodoo->render_not_full_event[1]); - if (PARAM_FULL_1) - { - thread_wait_event(voodoo->render_not_full_event[0], -1); /*Wait for room in ringbuffer*/ - } - if (voodoo->render_threads == 2 && PARAM_FULL_2) - { - thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/ - } - } - - use_texture(voodoo, params, 0); - if (voodoo->dual_tmus) - use_texture(voodoo, params, 1); - - memcpy(params_new, params, sizeof(voodoo_params_t)); - - voodoo->params_write_idx++; - - if (PARAM_ENTRIES_1 < 4 || (voodoo->render_threads == 2 && PARAM_ENTRIES_2 < 4)) - wake_render_thread(voodoo); -} - -static void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params) -{ - int y; - int low_y, high_y; - - if (params->fbzMode & (1 << 17)) - { - high_y = voodoo->v_disp - params->clipLowY; - low_y = voodoo->v_disp - params->clipHighY; - } - else - { - low_y = params->clipLowY; - high_y = params->clipHighY; - } - - if (params->fbzMode & FBZ_RGB_WMASK) - { - int r, g, b; - uint16_t col; - - r = ((params->color1 >> 16) >> 3) & 0x1f; - g = ((params->color1 >> 8) >> 2) & 0x3f; - b = (params->color1 >> 3) & 0x1f; - col = b | (g << 5) | (r << 11); - - if (SLI_ENABLED) - { - for (y = low_y; y < high_y; y += 2) - { - uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; - int x; - - for (x = params->clipLeft; x < params->clipRight; x++) - cbuf[x] = col; - } - } - else - { - for (y = low_y; y < high_y; y++) - { - uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + y*voodoo->row_width) & voodoo->fb_mask]; - int x; - - for (x = params->clipLeft; x < params->clipRight; x++) - cbuf[x] = col; - } - } - } - if (params->fbzMode & FBZ_DEPTH_WMASK) - { - if (SLI_ENABLED) - { - for (y = low_y; y < high_y; y += 2) - { - uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; - int x; - - for (x = params->clipLeft; x < params->clipRight; x++) - abuf[x] = params->zaColor & 0xffff; - } - } - else - { - for (y = low_y; y < high_y; y++) - { - uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + y*voodoo->row_width) & voodoo->fb_mask]; - int x; - - for (x = params->clipLeft; x < params->clipRight; x++) - abuf[x] = params->zaColor & 0xffff; - } - } - } -} - -enum -{ - SETUPMODE_RGB = (1 << 0), - SETUPMODE_ALPHA = (1 << 1), - SETUPMODE_Z = (1 << 2), - SETUPMODE_Wb = (1 << 3), - SETUPMODE_W0 = (1 << 4), - SETUPMODE_S0_T0 = (1 << 5), - SETUPMODE_W1 = (1 << 6), - SETUPMODE_S1_T1 = (1 << 7), - - SETUPMODE_STRIP_MODE = (1 << 16), - SETUPMODE_CULLING_ENABLE = (1 << 17), - SETUPMODE_CULLING_SIGN = (1 << 18), - SETUPMODE_DISABLE_PINGPONG = (1 << 19) -}; - -static void triangle_setup(voodoo_t *voodoo) -{ - float dxAB, dxBC, dyAB, dyBC; - float area; - int va = 0, vb = 1, vc = 2; - int reverse_cull = 0; - - vert_t verts[3]; - - verts[0] = voodoo->verts[0]; - verts[1] = voodoo->verts[1]; - verts[2] = voodoo->verts[2]; - - if (verts[0].sVy < verts[1].sVy) - { - if (verts[1].sVy < verts[2].sVy) - { - /* V1>V0, V2>V1, V2>V1>V0*/ - va = 0; /*OK*/ - vb = 1; - vc = 2; - } - else - { - /* V1>V0, V1>V2*/ - if (verts[0].sVy < verts[2].sVy) - { - /* V1>V0, V1>V2, V2>V0, V1>V2>V0*/ - va = 0; - vb = 2; - vc = 1; - reverse_cull = 1; - } - else - { - /* V1>V0, V1>V2, V0>V2, V1>V0>V2*/ - va = 2; - vb = 0; - vc = 1; - } - } - } - else - { - if (verts[1].sVy < verts[2].sVy) - { - /* V0>V1, V2>V1*/ - if (verts[0].sVy < verts[2].sVy) - { - /* V0>V1, V2>V1, V2>V0, V2>V0>V1*/ - va = 1; - vb = 0; - vc = 2; - reverse_cull = 1; - } - else - { - /* V0>V1, V2>V1, V0>V2, V0>V2>V1*/ - va = 1; - vb = 2; - vc = 0; - } - } - else - { - /*V0>V1>V2*/ - va = 2; - vb = 1; - vc = 0; - reverse_cull = 1; - } - } - - dxAB = verts[va].sVx - verts[vb].sVx; - dxBC = verts[vb].sVx - verts[vc].sVx; - dyAB = verts[va].sVy - verts[vb].sVy; - dyBC = verts[vb].sVy - verts[vc].sVy; - - area = dxAB * dyBC - dxBC * dyAB; - - if (area == 0.0) - { - if ((voodoo->sSetupMode & SETUPMODE_CULLING_ENABLE) && - !(voodoo->sSetupMode & SETUPMODE_DISABLE_PINGPONG)) - voodoo->sSetupMode ^= SETUPMODE_CULLING_SIGN; - - return; - } - - dxAB /= area; - dxBC /= area; - dyAB /= area; - dyBC /= area; - - if (voodoo->sSetupMode & SETUPMODE_CULLING_ENABLE) - { - int cull_sign = voodoo->sSetupMode & SETUPMODE_CULLING_SIGN; - int sign = (area < 0.0); - - if (!(voodoo->sSetupMode & SETUPMODE_DISABLE_PINGPONG)) - voodoo->sSetupMode ^= SETUPMODE_CULLING_SIGN; - - if (reverse_cull) - sign = !sign; - - if (cull_sign && sign) - return; - if (!cull_sign && !sign) - return; - } - - voodoo->params.vertexAx = (int32_t)(int16_t)((int32_t)(verts[va].sVx * 16.0f) & 0xffff); - voodoo->params.vertexAy = (int32_t)(int16_t)((int32_t)(verts[va].sVy * 16.0f) & 0xffff); - voodoo->params.vertexBx = (int32_t)(int16_t)((int32_t)(verts[vb].sVx * 16.0f) & 0xffff); - voodoo->params.vertexBy = (int32_t)(int16_t)((int32_t)(verts[vb].sVy * 16.0f) & 0xffff); - voodoo->params.vertexCx = (int32_t)(int16_t)((int32_t)(verts[vc].sVx * 16.0f) & 0xffff); - voodoo->params.vertexCy = (int32_t)(int16_t)((int32_t)(verts[vc].sVy * 16.0f) & 0xffff); - - if (voodoo->params.vertexAy > voodoo->params.vertexBy || voodoo->params.vertexBy > voodoo->params.vertexCy) - fatal("triangle_setup wrong order %d %d %d\n", voodoo->params.vertexAy, voodoo->params.vertexBy, voodoo->params.vertexCy); - - if (voodoo->sSetupMode & SETUPMODE_RGB) - { - voodoo->params.startR = (int32_t)(verts[va].sRed * 4096.0f); - voodoo->params.dRdX = (int32_t)(((verts[va].sRed - verts[vb].sRed) * dyBC - (verts[vb].sRed - verts[vc].sRed) * dyAB) * 4096.0f); - voodoo->params.dRdY = (int32_t)(((verts[vb].sRed - verts[vc].sRed) * dxAB - (verts[va].sRed - verts[vb].sRed) * dxBC) * 4096.0f); - voodoo->params.startG = (int32_t)(verts[va].sGreen * 4096.0f); - voodoo->params.dGdX = (int32_t)(((verts[va].sGreen - verts[vb].sGreen) * dyBC - (verts[vb].sGreen - verts[vc].sGreen) * dyAB) * 4096.0f); - voodoo->params.dGdY = (int32_t)(((verts[vb].sGreen - verts[vc].sGreen) * dxAB - (verts[va].sGreen - verts[vb].sGreen) * dxBC) * 4096.0f); - voodoo->params.startB = (int32_t)(verts[va].sBlue * 4096.0f); - voodoo->params.dBdX = (int32_t)(((verts[va].sBlue - verts[vb].sBlue) * dyBC - (verts[vb].sBlue - verts[vc].sBlue) * dyAB) * 4096.0f); - voodoo->params.dBdY = (int32_t)(((verts[vb].sBlue - verts[vc].sBlue) * dxAB - (verts[va].sBlue - verts[vb].sBlue) * dxBC) * 4096.0f); - } - if (voodoo->sSetupMode & SETUPMODE_ALPHA) - { - voodoo->params.startA = (int32_t)(verts[va].sAlpha * 4096.0f); - voodoo->params.dAdX = (int32_t)(((verts[va].sAlpha - verts[vb].sAlpha) * dyBC - (verts[vb].sAlpha - verts[vc].sAlpha) * dyAB) * 4096.0f); - voodoo->params.dAdY = (int32_t)(((verts[vb].sAlpha - verts[vc].sAlpha) * dxAB - (verts[va].sAlpha - verts[vb].sAlpha) * dxBC) * 4096.0f); - } - if (voodoo->sSetupMode & SETUPMODE_Z) - { - voodoo->params.startZ = (int32_t)(verts[va].sVz * 4096.0f); - voodoo->params.dZdX = (int32_t)(((verts[va].sVz - verts[vb].sVz) * dyBC - (verts[vb].sVz - verts[vc].sVz) * dyAB) * 4096.0f); - voodoo->params.dZdY = (int32_t)(((verts[vb].sVz - verts[vc].sVz) * dxAB - (verts[va].sVz - verts[vb].sVz) * dxBC) * 4096.0f); - } - if (voodoo->sSetupMode & SETUPMODE_Wb) - { - voodoo->params.startW = (int64_t)(verts[va].sWb * 4294967296.0f); - voodoo->params.dWdX = (int64_t)(((verts[va].sWb - verts[vb].sWb) * dyBC - (verts[vb].sWb - verts[vc].sWb) * dyAB) * 4294967296.0f); - voodoo->params.dWdY = (int64_t)(((verts[vb].sWb - verts[vc].sWb) * dxAB - (verts[va].sWb - verts[vb].sWb) * dxBC) * 4294967296.0f); - voodoo->params.tmu[0].startW = voodoo->params.tmu[1].startW = voodoo->params.startW; - voodoo->params.tmu[0].dWdX = voodoo->params.tmu[1].dWdX = voodoo->params.dWdX; - voodoo->params.tmu[0].dWdY = voodoo->params.tmu[1].dWdY = voodoo->params.dWdY; - } - if (voodoo->sSetupMode & SETUPMODE_W0) - { - voodoo->params.tmu[0].startW = (int64_t)(verts[va].sW0 * 4294967296.0f); - voodoo->params.tmu[0].dWdX = (int64_t)(((verts[va].sW0 - verts[vb].sW0) * dyBC - (verts[vb].sW0 - verts[vc].sW0) * dyAB) * 4294967296.0f); - voodoo->params.tmu[0].dWdY = (int64_t)(((verts[vb].sW0 - verts[vc].sW0) * dxAB - (verts[va].sW0 - verts[vb].sW0) * dxBC) * 4294967296.0f); - voodoo->params.tmu[1].startW = voodoo->params.tmu[0].startW; - voodoo->params.tmu[1].dWdX = voodoo->params.tmu[0].dWdX; - voodoo->params.tmu[1].dWdY = voodoo->params.tmu[0].dWdY; - } - if (voodoo->sSetupMode & SETUPMODE_S0_T0) - { - voodoo->params.tmu[0].startS = (int64_t)(verts[va].sS0 * 4294967296.0f); - voodoo->params.tmu[0].dSdX = (int64_t)(((verts[va].sS0 - verts[vb].sS0) * dyBC - (verts[vb].sS0 - verts[vc].sS0) * dyAB) * 4294967296.0f); - voodoo->params.tmu[0].dSdY = (int64_t)(((verts[vb].sS0 - verts[vc].sS0) * dxAB - (verts[va].sS0 - verts[vb].sS0) * dxBC) * 4294967296.0f); - voodoo->params.tmu[0].startT = (int64_t)(verts[va].sT0 * 4294967296.0f); - voodoo->params.tmu[0].dTdX = (int64_t)(((verts[va].sT0 - verts[vb].sT0) * dyBC - (verts[vb].sT0 - verts[vc].sT0) * dyAB) * 4294967296.0f); - voodoo->params.tmu[0].dTdY = (int64_t)(((verts[vb].sT0 - verts[vc].sT0) * dxAB - (verts[va].sT0 - verts[vb].sT0) * dxBC) * 4294967296.0f); - voodoo->params.tmu[1].startS = voodoo->params.tmu[0].startS; - voodoo->params.tmu[1].dSdX = voodoo->params.tmu[0].dSdX; - voodoo->params.tmu[1].dSdY = voodoo->params.tmu[0].dSdY; - voodoo->params.tmu[1].startT = voodoo->params.tmu[0].startT; - voodoo->params.tmu[1].dTdX = voodoo->params.tmu[0].dTdX; - voodoo->params.tmu[1].dTdY = voodoo->params.tmu[0].dTdY; - } - if (voodoo->sSetupMode & SETUPMODE_W1) - { - voodoo->params.tmu[1].startW = (int64_t)(verts[va].sW1 * 4294967296.0f); - voodoo->params.tmu[1].dWdX = (int64_t)(((verts[va].sW1 - verts[vb].sW1) * dyBC - (verts[vb].sW1 - verts[vc].sW1) * dyAB) * 4294967296.0f); - voodoo->params.tmu[1].dWdY = (int64_t)(((verts[vb].sW1 - verts[vc].sW1) * dxAB - (verts[va].sW1 - verts[vb].sW1) * dxBC) * 4294967296.0f); - } - if (voodoo->sSetupMode & SETUPMODE_S1_T1) - { - voodoo->params.tmu[1].startS = (int64_t)(verts[va].sS1 * 4294967296.0f); - voodoo->params.tmu[1].dSdX = (int64_t)(((verts[va].sS1 - verts[vb].sS1) * dyBC - (verts[vb].sS1 - verts[vc].sS1) * dyAB) * 4294967296.0f); - voodoo->params.tmu[1].dSdY = (int64_t)(((verts[vb].sS1 - verts[vc].sS1) * dxAB - (verts[va].sS1 - verts[vb].sS1) * dxBC) * 4294967296.0f); - voodoo->params.tmu[1].startT = (int64_t)(verts[va].sT1 * 4294967296.0f); - voodoo->params.tmu[1].dTdX = (int64_t)(((verts[va].sT1 - verts[vb].sT1) * dyBC - (verts[vb].sT1 - verts[vc].sT1) * dyAB) * 4294967296.0f); - voodoo->params.tmu[1].dTdY = (int64_t)(((verts[vb].sT1 - verts[vc].sT1) * dxAB - (verts[va].sT1 - verts[vb].sT1) * dxBC) * 4294967296.0f); - } - - voodoo->params.sign = (area < 0.0); - - if (voodoo->ncc_dirty[0]) - voodoo_update_ncc(voodoo, 0); - if (voodoo->ncc_dirty[1]) - voodoo_update_ncc(voodoo, 1); - voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; - - queue_triangle(voodoo, &voodoo->params); -} - -enum -{ - BLIT_COMMAND_SCREEN_TO_SCREEN = 0, - BLIT_COMMAND_CPU_TO_SCREEN = 1, - BLIT_COMMAND_RECT_FILL = 2, - BLIT_COMMAND_SGRAM_FILL = 3 -}; - -enum -{ - BLIT_SRC_1BPP = (0 << 3), - BLIT_SRC_1BPP_BYTE_PACKED = (1 << 3), - BLIT_SRC_16BPP = (2 << 3), - BLIT_SRC_24BPP = (3 << 3), - BLIT_SRC_24BPP_DITHER_2X2 = (4 << 3), - BLIT_SRC_24BPP_DITHER_4X4 = (5 << 3) -}; - -enum -{ - BLIT_SRC_RGB_ARGB = (0 << 6), - BLIT_SRC_RGB_ABGR = (1 << 6), - BLIT_SRC_RGB_RGBA = (2 << 6), - BLIT_SRC_RGB_BGRA = (3 << 6) -}; - -enum -{ - BLIT_COMMAND_MASK = 7, - BLIT_SRC_FORMAT = (7 << 3), - BLIT_SRC_RGB_FORMAT = (3 << 6), - BLIT_SRC_CHROMA = (1 << 10), - BLIT_DST_CHROMA = (1 << 12), - BLIT_CLIPPING_ENABLED = (1 << 16) -}; - -enum -{ - BLIT_ROP_DST_PASS = (1 << 0), - BLIT_ROP_SRC_PASS = (1 << 1) -}; - -#define MIX(src_dat, dst_dat, rop) \ - switch (rop) \ - { \ - case 0x0: dst_dat = 0; break; \ - case 0x1: dst_dat = ~(src_dat | dst_dat); break; \ - case 0x2: dst_dat = ~src_dat & dst_dat; break; \ - case 0x3: dst_dat = ~src_dat; break; \ - case 0x4: dst_dat = src_dat & ~dst_dat; break; \ - case 0x5: dst_dat = ~dst_dat; break; \ - case 0x6: dst_dat = src_dat ^ dst_dat; break; \ - case 0x7: dst_dat = ~(src_dat & dst_dat); break; \ - case 0x8: dst_dat = src_dat & dst_dat; break; \ - case 0x9: dst_dat = ~(src_dat ^ dst_dat); break; \ - case 0xa: dst_dat = dst_dat; break; \ - case 0xb: dst_dat = ~src_dat | dst_dat; break; \ - case 0xc: dst_dat = src_dat; break; \ - case 0xd: dst_dat = src_dat | ~dst_dat; break; \ - case 0xe: dst_dat = src_dat | dst_dat; break; \ - case 0xf: dst_dat = 0xffff; break; \ - } - -static void blit_start(voodoo_t *voodoo) -{ - uint64_t dat64; - int size_x = ABS(voodoo->bltSizeX), size_y = ABS(voodoo->bltSizeY); - int x_dir = (voodoo->bltSizeX > 0) ? 1 : -1; - int y_dir = (voodoo->bltSizeY > 0) ? 1 : -1; - int dst_x; - int src_y = voodoo->bltSrcY & 0x7ff, dst_y = voodoo->bltDstY & 0x7ff; - int src_stride = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcXYStride & 0x3f) * 32*2) : (voodoo->bltSrcXYStride & 0xff8); - int dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); - uint32_t src_base_addr = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcBaseAddr & 0x3ff) << 12) : (voodoo->bltSrcBaseAddr & 0x3ffff8); - uint32_t dst_base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); - int x, y; - -/* voodoo_log("blit_start: command=%08x srcX=%i srcY=%i dstX=%i dstY=%i sizeX=%i sizeY=%i color=%04x,%04x\n", - voodoo->bltCommand, voodoo->bltSrcX, voodoo->bltSrcY, voodoo->bltDstX, voodoo->bltDstY, voodoo->bltSizeX, voodoo->bltSizeY, voodoo->bltColorFg, voodoo->bltColorBg);*/ - - wait_for_render_thread_idle(voodoo); - - switch (voodoo->bltCommand & BLIT_COMMAND_MASK) - { - case BLIT_COMMAND_SCREEN_TO_SCREEN: - for (y = 0; y <= size_y; y++) - { - uint16_t *src = (uint16_t *)&voodoo->fb_mem[src_base_addr + src_y*src_stride]; - uint16_t *dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; - int src_x = voodoo->bltSrcX, dst_x = voodoo->bltDstX; - - for (x = 0; x <= size_x; x++) - { - uint16_t src_dat = src[src_x]; - uint16_t dst_dat = dst[dst_x]; - int rop = 0; - - if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) - { - if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || - dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) - goto skip_pixel_blit; - } - - if (voodoo->bltCommand & BLIT_SRC_CHROMA) - { - int r = (src_dat >> 11); - int g = (src_dat >> 5) & 0x3f; - int b = src_dat & 0x1f; - - if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && - g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && - b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) - rop |= BLIT_ROP_SRC_PASS; - } - if (voodoo->bltCommand & BLIT_DST_CHROMA) - { - int r = (dst_dat >> 11); - int g = (dst_dat >> 5) & 0x3f; - int b = dst_dat & 0x1f; - - if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && - g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && - b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) - rop |= BLIT_ROP_DST_PASS; - } - - MIX(src_dat, dst_dat, voodoo->bltRop[rop]); - - dst[dst_x] = dst_dat; -skip_pixel_blit: - src_x += x_dir; - dst_x += x_dir; - } - - src_y += y_dir; - dst_y += y_dir; - } - break; - - case BLIT_COMMAND_CPU_TO_SCREEN: - voodoo->blt.dst_x = voodoo->bltDstX; - voodoo->blt.dst_y = voodoo->bltDstY; - voodoo->blt.cur_x = 0; - voodoo->blt.size_x = size_x; - voodoo->blt.size_y = size_y; - voodoo->blt.x_dir = x_dir; - voodoo->blt.y_dir = y_dir; - voodoo->blt.dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); - break; - - case BLIT_COMMAND_RECT_FILL: - for (y = 0; y <= size_y; y++) - { - uint16_t *dst; - int dst_x = voodoo->bltDstX; - - if (SLI_ENABLED) - { - if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || - ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) - goto skip_line_fill; - dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + (dst_y >> 1) * dst_stride]; - } - else - dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; - - for (x = 0; x <= size_x; x++) - { - if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) - { - if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || - dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) - goto skip_pixel_fill; - } - - dst[dst_x] = voodoo->bltColorFg; -skip_pixel_fill: - dst_x += x_dir; - } -skip_line_fill: - dst_y += y_dir; - } - break; - - case BLIT_COMMAND_SGRAM_FILL: - /*32x32 tiles - 2kb*/ - dst_y = voodoo->bltDstY & 0x3ff; - size_x = voodoo->bltSizeX & 0x1ff; //512*8 = 4kb - size_y = voodoo->bltSizeY & 0x3ff; - - dat64 = voodoo->bltColorFg | ((uint64_t)voodoo->bltColorFg << 16) | - ((uint64_t)voodoo->bltColorFg << 32) | ((uint64_t)voodoo->bltColorFg << 48); - - for (y = 0; y <= size_y; y++) - { - uint64_t *dst; - - /*This may be wrong*/ - if (!y) - { - dst_x = voodoo->bltDstX & 0x1ff; - size_x = 511 - dst_x; - } - else if (y < size_y) - { - dst_x = 0; - size_x = 511; - } - else - { - dst_x = 0; - size_x = voodoo->bltSizeX & 0x1ff; - } - - dst = (uint64_t *)&voodoo->fb_mem[(dst_y*512*8 + dst_x*8) & voodoo->fb_mask]; - - for (x = 0; x <= size_x; x++) - dst[x] = dat64; - - dst_y++; - } - break; - - default: - fatal("bad blit command %08x\n", voodoo->bltCommand); - } -} - -static void blit_data(voodoo_t *voodoo, uint32_t data) -{ - int src_bits = 32; - uint32_t base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); - uint32_t addr; - uint16_t *dst; - - if ((voodoo->bltCommand & BLIT_COMMAND_MASK) != BLIT_COMMAND_CPU_TO_SCREEN) - return; - - if (SLI_ENABLED) - { - addr = base_addr + (voodoo->blt.dst_y >> 1) * voodoo->blt.dst_stride; - dst = (uint16_t *)&voodoo->fb_mem[addr]; - } - else - { - addr = base_addr + voodoo->blt.dst_y*voodoo->blt.dst_stride; - dst = (uint16_t *)&voodoo->fb_mem[addr]; - } - - if (addr >= voodoo->front_offset && voodoo->row_width) - { - int y = (addr - voodoo->front_offset) / voodoo->row_width; - if (y < voodoo->v_disp) - voodoo->dirty_line[y] = 2; - } - - while (src_bits && voodoo->blt.cur_x <= voodoo->blt.size_x) - { - int r = 0, g = 0, b = 0; - uint16_t src_dat = 0, dst_dat; - int x = (voodoo->blt.x_dir > 0) ? (voodoo->blt.dst_x + voodoo->blt.cur_x) : (voodoo->blt.dst_x - voodoo->blt.cur_x); - int rop = 0; - - switch (voodoo->bltCommand & BLIT_SRC_FORMAT) - { - case BLIT_SRC_1BPP: case BLIT_SRC_1BPP_BYTE_PACKED: - src_dat = (data & 1) ? voodoo->bltColorFg : voodoo->bltColorBg; - data >>= 1; - src_bits--; - break; - case BLIT_SRC_16BPP: - switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) - { - case BLIT_SRC_RGB_ARGB: case BLIT_SRC_RGB_RGBA: - src_dat = data & 0xffff; - break; - case BLIT_SRC_RGB_ABGR: case BLIT_SRC_RGB_BGRA: - src_dat = ((data & 0xf800) >> 11) | (data & 0x07c0) | ((data & 0x0038) << 11); - break; - } - data >>= 16; - src_bits -= 16; - break; - case BLIT_SRC_24BPP: case BLIT_SRC_24BPP_DITHER_2X2: case BLIT_SRC_24BPP_DITHER_4X4: - switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) - { - case BLIT_SRC_RGB_ARGB: - r = (data >> 16) & 0xff; - g = (data >> 8) & 0xff; - b = data & 0xff; - break; - case BLIT_SRC_RGB_ABGR: - r = data & 0xff; - g = (data >> 8) & 0xff; - b = (data >> 16) & 0xff; - break; - case BLIT_SRC_RGB_RGBA: - r = (data >> 24) & 0xff; - g = (data >> 16) & 0xff; - b = (data >> 8) & 0xff; - break; - case BLIT_SRC_RGB_BGRA: - r = (data >> 8) & 0xff; - g = (data >> 16) & 0xff; - b = (data >> 24) & 0xff; - break; - } - switch (voodoo->bltCommand & BLIT_SRC_FORMAT) - { - case BLIT_SRC_24BPP: - src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); - break; - case BLIT_SRC_24BPP_DITHER_2X2: - r = dither_rb2x2[r][voodoo->blt.dst_y & 1][x & 1]; - g = dither_g2x2[g][voodoo->blt.dst_y & 1][x & 1]; - b = dither_rb2x2[b][voodoo->blt.dst_y & 1][x & 1]; - src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); - break; - case BLIT_SRC_24BPP_DITHER_4X4: - r = dither_rb[r][voodoo->blt.dst_y & 3][x & 3]; - g = dither_g[g][voodoo->blt.dst_y & 3][x & 3]; - b = dither_rb[b][voodoo->blt.dst_y & 3][x & 3]; - src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); - break; - } - src_bits = 0; - break; - } - - if (SLI_ENABLED) - { - if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || - ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) - goto skip_pixel; - } - - if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) - { - if (x < voodoo->bltClipLeft || x >= voodoo->bltClipRight || - voodoo->blt.dst_y < voodoo->bltClipLowY || voodoo->blt.dst_y >= voodoo->bltClipHighY) - goto skip_pixel; - } - - dst_dat = dst[x]; - - if (voodoo->bltCommand & BLIT_SRC_CHROMA) - { - r = (src_dat >> 11); - g = (src_dat >> 5) & 0x3f; - b = src_dat & 0x1f; - - if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && - g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && - b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) - rop |= BLIT_ROP_SRC_PASS; - } - if (voodoo->bltCommand & BLIT_DST_CHROMA) - { - r = (dst_dat >> 11); - g = (dst_dat >> 5) & 0x3f; - b = dst_dat & 0x1f; - - if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && - g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && - b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) - rop |= BLIT_ROP_DST_PASS; - } - - MIX(src_dat, dst_dat, voodoo->bltRop[rop]); - - dst[x] = dst_dat; - -skip_pixel: - voodoo->blt.cur_x++; - } - - if (voodoo->blt.cur_x > voodoo->blt.size_x) - { - voodoo->blt.size_y--; - if (voodoo->blt.size_y >= 0) - { - voodoo->blt.cur_x = 0; - voodoo->blt.dst_y += voodoo->blt.y_dir; - } - } -} - -enum -{ - CHIP_FBI = 0x1, - CHIP_TREX0 = 0x2, - CHIP_TREX1 = 0x4, - CHIP_TREX2 = 0x8 -}; - -static void wait_for_swap_complete(voodoo_t *voodoo) -{ - while (voodoo->swap_pending) - { - thread_wait_event(voodoo->wake_fifo_thread, -1); - thread_reset_event(voodoo->wake_fifo_thread); - if ((voodoo->swap_pending && voodoo->flush) || FIFO_FULL) - { - /*Main thread is waiting for FIFO to empty, so skip vsync wait and just swap*/ - memset(voodoo->dirty_line, 1, 1024); - voodoo->front_offset = voodoo->params.front_offset; - if (voodoo->swap_count > 0) - voodoo->swap_count--; - voodoo->swap_pending = 0; - break; - } - } -} - -static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - union - { - uint32_t i; - float f; - } tempif; - int ad21 = addr & (1 << 21); - int chip = (addr >> 10) & 0xf; - if (!chip) - chip = 0xf; - - tempif.i = val; -//voodoo_log("voodoo_reg_write_l: addr=%08x val=%08x(%f) chip=%x\n", addr, val, tempif.f, chip); - addr &= 0x3fc; - - if ((voodoo->fbiInit3 & FBIINIT3_REMAP) && addr < 0x100 && ad21) - addr |= 0x400; - switch (addr) - { - case SST_swapbufferCMD: -// voodoo_log(" start swap buffer command\n"); - - if (TRIPLE_BUFFER) - { - voodoo->disp_buffer = (voodoo->disp_buffer + 1) % 3; - voodoo->draw_buffer = (voodoo->draw_buffer + 1) % 3; - } - else - { - voodoo->disp_buffer = !voodoo->disp_buffer; - voodoo->draw_buffer = !voodoo->draw_buffer; - } - voodoo_recalc(voodoo); - - voodoo->params.swapbufferCMD = val; - - voodoo_log("Swap buffer %08x %d %p %i\n", val, voodoo->swap_count, &voodoo->swap_count, (voodoo == voodoo->set->voodoos[1]) ? 1 : 0); -// voodoo->front_offset = params->front_offset; - wait_for_render_thread_idle(voodoo); - if (!(val & 1)) - { - memset(voodoo->dirty_line, 1, 1024); - voodoo->front_offset = voodoo->params.front_offset; - if (voodoo->swap_count > 0) - voodoo->swap_count--; - } - else if (TRIPLE_BUFFER) - { - if (voodoo->swap_pending) - wait_for_swap_complete(voodoo); - - voodoo->swap_interval = (val >> 1) & 0xff; - voodoo->swap_offset = voodoo->params.front_offset; - voodoo->swap_pending = 1; - } - else - { - voodoo->swap_interval = (val >> 1) & 0xff; - voodoo->swap_offset = voodoo->params.front_offset; - voodoo->swap_pending = 1; - - wait_for_swap_complete(voodoo); - } - voodoo->cmd_read++; - break; - - case SST_vertexAx: case SST_remap_vertexAx: - voodoo->params.vertexAx = val & 0xffff; - break; - case SST_vertexAy: case SST_remap_vertexAy: - voodoo->params.vertexAy = val & 0xffff; - break; - case SST_vertexBx: case SST_remap_vertexBx: - voodoo->params.vertexBx = val & 0xffff; - break; - case SST_vertexBy: case SST_remap_vertexBy: - voodoo->params.vertexBy = val & 0xffff; - break; - case SST_vertexCx: case SST_remap_vertexCx: - voodoo->params.vertexCx = val & 0xffff; - break; - case SST_vertexCy: case SST_remap_vertexCy: - voodoo->params.vertexCy = val & 0xffff; - break; - - case SST_startR: case SST_remap_startR: - voodoo->params.startR = val & 0xffffff; - break; - case SST_startG: case SST_remap_startG: - voodoo->params.startG = val & 0xffffff; - break; - case SST_startB: case SST_remap_startB: - voodoo->params.startB = val & 0xffffff; - break; - case SST_startZ: case SST_remap_startZ: - voodoo->params.startZ = val; - break; - case SST_startA: case SST_remap_startA: - voodoo->params.startA = val & 0xffffff; - break; - case SST_startS: case SST_remap_startS: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startS = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startS = ((int64_t)(int32_t)val) << 14; - break; - case SST_startT: case SST_remap_startT: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startT = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startT = ((int64_t)(int32_t)val) << 14; - break; - case SST_startW: case SST_remap_startW: - if (chip & CHIP_FBI) - voodoo->params.startW = (int64_t)(int32_t)val << 2; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startW = (int64_t)(int32_t)val << 2; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startW = (int64_t)(int32_t)val << 2; - break; - - case SST_dRdX: case SST_remap_dRdX: - voodoo->params.dRdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dGdX: case SST_remap_dGdX: - voodoo->params.dGdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dBdX: case SST_remap_dBdX: - voodoo->params.dBdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dZdX: case SST_remap_dZdX: - voodoo->params.dZdX = val; - break; - case SST_dAdX: case SST_remap_dAdX: - voodoo->params.dAdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dSdX: case SST_remap_dSdX: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dSdX = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dSdX = ((int64_t)(int32_t)val) << 14; - break; - case SST_dTdX: case SST_remap_dTdX: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dTdX = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dTdX = ((int64_t)(int32_t)val) << 14; - break; - case SST_dWdX: case SST_remap_dWdX: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dWdX = (int64_t)(int32_t)val << 2; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dWdX = (int64_t)(int32_t)val << 2; - if (chip & CHIP_FBI) - voodoo->params.dWdX = (int64_t)(int32_t)val << 2; - break; - - case SST_dRdY: case SST_remap_dRdY: - voodoo->params.dRdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dGdY: case SST_remap_dGdY: - voodoo->params.dGdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dBdY: case SST_remap_dBdY: - voodoo->params.dBdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dZdY: case SST_remap_dZdY: - voodoo->params.dZdY = val; - break; - case SST_dAdY: case SST_remap_dAdY: - voodoo->params.dAdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); - break; - case SST_dSdY: case SST_remap_dSdY: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dSdY = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dSdY = ((int64_t)(int32_t)val) << 14; - break; - case SST_dTdY: case SST_remap_dTdY: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dTdY = ((int64_t)(int32_t)val) << 14; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dTdY = ((int64_t)(int32_t)val) << 14; - break; - case SST_dWdY: case SST_remap_dWdY: - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dWdY = (int64_t)(int32_t)val << 2; - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dWdY = (int64_t)(int32_t)val << 2; - if (chip & CHIP_FBI) - voodoo->params.dWdY = (int64_t)(int32_t)val << 2; - break; - - case SST_triangleCMD: case SST_remap_triangleCMD: - voodoo->params.sign = val & (1 << 31); - - if (voodoo->ncc_dirty[0]) - voodoo_update_ncc(voodoo, 0); - if (voodoo->ncc_dirty[1]) - voodoo_update_ncc(voodoo, 1); - voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; - - queue_triangle(voodoo, &voodoo->params); - - voodoo->cmd_read++; - break; - - case SST_fvertexAx: case SST_remap_fvertexAx: - voodoo->fvertexAx.i = val; - voodoo->params.vertexAx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAx.f * 16.0f) & 0xffff; - break; - case SST_fvertexAy: case SST_remap_fvertexAy: - voodoo->fvertexAy.i = val; - voodoo->params.vertexAy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAy.f * 16.0f) & 0xffff; - break; - case SST_fvertexBx: case SST_remap_fvertexBx: - voodoo->fvertexBx.i = val; - voodoo->params.vertexBx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBx.f * 16.0f) & 0xffff; - break; - case SST_fvertexBy: case SST_remap_fvertexBy: - voodoo->fvertexBy.i = val; - voodoo->params.vertexBy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBy.f * 16.0f) & 0xffff; - break; - case SST_fvertexCx: case SST_remap_fvertexCx: - voodoo->fvertexCx.i = val; - voodoo->params.vertexCx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCx.f * 16.0f) & 0xffff; - break; - case SST_fvertexCy: case SST_remap_fvertexCy: - voodoo->fvertexCy.i = val; - voodoo->params.vertexCy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCy.f * 16.0f) & 0xffff; - break; - - case SST_fstartR: case SST_remap_fstartR: - tempif.i = val; - voodoo->params.startR = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fstartG: case SST_remap_fstartG: - tempif.i = val; - voodoo->params.startG = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fstartB: case SST_remap_fstartB: - tempif.i = val; - voodoo->params.startB = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fstartZ: case SST_remap_fstartZ: - tempif.i = val; - voodoo->params.startZ = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fstartA: case SST_remap_fstartA: - tempif.i = val; - voodoo->params.startA = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fstartS: case SST_remap_fstartS: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startS = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startS = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fstartT: case SST_remap_fstartT: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startT = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startT = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fstartW: case SST_remap_fstartW: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].startW = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].startW = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_FBI) - voodoo->params.startW = (int64_t)(tempif.f * 4294967296.0f); - break; - - case SST_fdRdX: case SST_remap_fdRdX: - tempif.i = val; - voodoo->params.dRdX = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdGdX: case SST_remap_fdGdX: - tempif.i = val; - voodoo->params.dGdX = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdBdX: case SST_remap_fdBdX: - tempif.i = val; - voodoo->params.dBdX = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdZdX: case SST_remap_fdZdX: - tempif.i = val; - voodoo->params.dZdX = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdAdX: case SST_remap_fdAdX: - tempif.i = val; - voodoo->params.dAdX = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdSdX: case SST_remap_fdSdX: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dSdX = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dSdX = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fdTdX: case SST_remap_fdTdX: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dTdX = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dTdX = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fdWdX: case SST_remap_fdWdX: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dWdX = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dWdX = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_FBI) - voodoo->params.dWdX = (int64_t)(tempif.f * 4294967296.0f); - break; - - case SST_fdRdY: case SST_remap_fdRdY: - tempif.i = val; - voodoo->params.dRdY = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdGdY: case SST_remap_fdGdY: - tempif.i = val; - voodoo->params.dGdY = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdBdY: case SST_remap_fdBdY: - tempif.i = val; - voodoo->params.dBdY = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdZdY: case SST_remap_fdZdY: - tempif.i = val; - voodoo->params.dZdY = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdAdY: case SST_remap_fdAdY: - tempif.i = val; - voodoo->params.dAdY = (int32_t)(tempif.f * 4096.0f); - break; - case SST_fdSdY: case SST_remap_fdSdY: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dSdY = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dSdY = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fdTdY: case SST_remap_fdTdY: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dTdY = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dTdY = (int64_t)(tempif.f * 4294967296.0f); - break; - case SST_fdWdY: case SST_remap_fdWdY: - tempif.i = val; - if (chip & CHIP_TREX0) - voodoo->params.tmu[0].dWdY = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_TREX1) - voodoo->params.tmu[1].dWdY = (int64_t)(tempif.f * 4294967296.0f); - if (chip & CHIP_FBI) - voodoo->params.dWdY = (int64_t)(tempif.f * 4294967296.0f); - break; - - case SST_ftriangleCMD: - voodoo->params.sign = val & (1 << 31); - - if (voodoo->ncc_dirty[0]) - voodoo_update_ncc(voodoo, 0); - if (voodoo->ncc_dirty[1]) - voodoo_update_ncc(voodoo, 1); - voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; - - queue_triangle(voodoo, &voodoo->params); - - voodoo->cmd_read++; - break; - - case SST_fbzColorPath: - voodoo->params.fbzColorPath = val; - voodoo->rgb_sel = val & 3; - break; - - case SST_fogMode: - voodoo->params.fogMode = val; - break; - case SST_alphaMode: - voodoo->params.alphaMode = val; - break; - case SST_fbzMode: - voodoo->params.fbzMode = val; - voodoo_recalc(voodoo); - break; - case SST_lfbMode: - voodoo->lfbMode = val; - voodoo_recalc(voodoo); - break; - - case SST_clipLeftRight: - if (voodoo->type >= VOODOO_2) - { - voodoo->params.clipRight = val & 0xfff; - voodoo->params.clipLeft = (val >> 16) & 0xfff; - } - else - { - voodoo->params.clipRight = val & 0x3ff; - voodoo->params.clipLeft = (val >> 16) & 0x3ff; - } - break; - case SST_clipLowYHighY: - if (voodoo->type >= VOODOO_2) - { - voodoo->params.clipHighY = val & 0xfff; - voodoo->params.clipLowY = (val >> 16) & 0xfff; - } - else - { - voodoo->params.clipHighY = val & 0x3ff; - voodoo->params.clipLowY = (val >> 16) & 0x3ff; - } - break; - - case SST_nopCMD: - voodoo->cmd_read++; - voodoo->fbiPixelsIn = 0; - voodoo->fbiChromaFail = 0; - voodoo->fbiZFuncFail = 0; - voodoo->fbiAFuncFail = 0; - voodoo->fbiPixelsOut = 0; - break; - case SST_fastfillCMD: - wait_for_render_thread_idle(voodoo); - voodoo_fastfill(voodoo, &voodoo->params); - voodoo->cmd_read++; - break; - - case SST_fogColor: - voodoo->params.fogColor.r = (val >> 16) & 0xff; - voodoo->params.fogColor.g = (val >> 8) & 0xff; - voodoo->params.fogColor.b = val & 0xff; - break; - - case SST_zaColor: - voodoo->params.zaColor = val; - break; - case SST_chromaKey: - voodoo->params.chromaKey_r = (val >> 16) & 0xff; - voodoo->params.chromaKey_g = (val >> 8) & 0xff; - voodoo->params.chromaKey_b = val & 0xff; - voodoo->params.chromaKey = val & 0xffffff; - break; - case SST_stipple: - voodoo->params.stipple = val; - break; - case SST_color0: - voodoo->params.color0 = val; - break; - case SST_color1: - voodoo->params.color1 = val; - break; - - case SST_fogTable00: case SST_fogTable01: case SST_fogTable02: case SST_fogTable03: - case SST_fogTable04: case SST_fogTable05: case SST_fogTable06: case SST_fogTable07: - case SST_fogTable08: case SST_fogTable09: case SST_fogTable0a: case SST_fogTable0b: - case SST_fogTable0c: case SST_fogTable0d: case SST_fogTable0e: case SST_fogTable0f: - case SST_fogTable10: case SST_fogTable11: case SST_fogTable12: case SST_fogTable13: - case SST_fogTable14: case SST_fogTable15: case SST_fogTable16: case SST_fogTable17: - case SST_fogTable18: case SST_fogTable19: case SST_fogTable1a: case SST_fogTable1b: - case SST_fogTable1c: case SST_fogTable1d: case SST_fogTable1e: case SST_fogTable1f: - addr = (addr - SST_fogTable00) >> 1; - voodoo->params.fogTable[addr].dfog = val & 0xff; - voodoo->params.fogTable[addr].fog = (val >> 8) & 0xff; - voodoo->params.fogTable[addr+1].dfog = (val >> 16) & 0xff; - voodoo->params.fogTable[addr+1].fog = (val >> 24) & 0xff; - break; - - case SST_clutData: - voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff; - voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff; - voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff; - if (val & 0x20000000) - { - voodoo->clutData[(val >> 24) & 0x3f].b = 255; - voodoo->clutData[(val >> 24) & 0x3f].g = 255; - voodoo->clutData[(val >> 24) & 0x3f].r = 255; - } - voodoo->clutData_dirty = 1; - break; - - case SST_sSetupMode: - voodoo->sSetupMode = val; - break; - case SST_sVx: - tempif.i = val; - voodoo->verts[3].sVx = tempif.f; -// voodoo_log("sVx[%i]=%f\n", voodoo->vertex_num, tempif.f); - break; - case SST_sVy: - tempif.i = val; - voodoo->verts[3].sVy = tempif.f; -// voodoo_log("sVy[%i]=%f\n", voodoo->vertex_num, tempif.f); - break; - case SST_sARGB: - voodoo->verts[3].sBlue = (float)(val & 0xff); - voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); - voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); - voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); - break; - case SST_sRed: - tempif.i = val; - voodoo->verts[3].sRed = tempif.f; - break; - case SST_sGreen: - tempif.i = val; - voodoo->verts[3].sGreen = tempif.f; - break; - case SST_sBlue: - tempif.i = val; - voodoo->verts[3].sBlue = tempif.f; - break; - case SST_sAlpha: - tempif.i = val; - voodoo->verts[3].sAlpha = tempif.f; - break; - case SST_sVz: - tempif.i = val; - voodoo->verts[3].sVz = tempif.f; - break; - case SST_sWb: - tempif.i = val; - voodoo->verts[3].sWb = tempif.f; - break; - case SST_sW0: - tempif.i = val; - voodoo->verts[3].sW0 = tempif.f; - break; - case SST_sS0: - tempif.i = val; - voodoo->verts[3].sS0 = tempif.f; - break; - case SST_sT0: - tempif.i = val; - voodoo->verts[3].sT0 = tempif.f; - break; - case SST_sW1: - tempif.i = val; - voodoo->verts[3].sW1 = tempif.f; - break; - case SST_sS1: - tempif.i = val; - voodoo->verts[3].sS1 = tempif.f; - break; - case SST_sT1: - tempif.i = val; - voodoo->verts[3].sT1 = tempif.f; - break; - - case SST_sBeginTriCMD: -// voodoo_log("sBeginTriCMD %i %f\n", voodoo->vertex_num, voodoo->verts[4].sVx); - voodoo->verts[0] = voodoo->verts[3]; - voodoo->vertex_num = 1; - voodoo->num_verticies = 1; - break; - case SST_sDrawTriCMD: -// voodoo_log("sDrawTriCMD %i %i %i\n", voodoo->num_verticies, voodoo->vertex_num, voodoo->sSetupMode & SETUPMODE_STRIP_MODE); - if (voodoo->vertex_num == 3) - voodoo->vertex_num = (voodoo->sSetupMode & SETUPMODE_STRIP_MODE) ? 1 : 0; - voodoo->verts[voodoo->vertex_num] = voodoo->verts[3]; - - voodoo->num_verticies++; - voodoo->vertex_num++; - if (voodoo->num_verticies == 3) - { -// voodoo_log("triangle_setup\n"); - triangle_setup(voodoo); - - voodoo->num_verticies = 2; - } - if (voodoo->vertex_num == 4) - fatal("sDrawTriCMD overflow\n"); - break; - - case SST_bltSrcBaseAddr: - voodoo->bltSrcBaseAddr = val & 0x3fffff; - break; - case SST_bltDstBaseAddr: -// voodoo_log("Write bltDstBaseAddr %08x\n", val); - voodoo->bltDstBaseAddr = val & 0x3fffff; - break; - case SST_bltXYStrides: - voodoo->bltSrcXYStride = val & 0xfff; - voodoo->bltDstXYStride = (val >> 16) & 0xfff; -// voodoo_log("Write bltXYStrides %08x\n", val); - break; - case SST_bltSrcChromaRange: - voodoo->bltSrcChromaRange = val; - voodoo->bltSrcChromaMinB = val & 0x1f; - voodoo->bltSrcChromaMinG = (val >> 5) & 0x3f; - voodoo->bltSrcChromaMinR = (val >> 11) & 0x1f; - voodoo->bltSrcChromaMaxB = (val >> 16) & 0x1f; - voodoo->bltSrcChromaMaxG = (val >> 21) & 0x3f; - voodoo->bltSrcChromaMaxR = (val >> 27) & 0x1f; - break; - case SST_bltDstChromaRange: - voodoo->bltDstChromaRange = val; - voodoo->bltDstChromaMinB = val & 0x1f; - voodoo->bltDstChromaMinG = (val >> 5) & 0x3f; - voodoo->bltDstChromaMinR = (val >> 11) & 0x1f; - voodoo->bltDstChromaMaxB = (val >> 16) & 0x1f; - voodoo->bltDstChromaMaxG = (val >> 21) & 0x3f; - voodoo->bltDstChromaMaxR = (val >> 27) & 0x1f; - break; - case SST_bltClipX: - voodoo->bltClipRight = val & 0xfff; - voodoo->bltClipLeft = (val >> 16) & 0xfff; - break; - case SST_bltClipY: - voodoo->bltClipHighY = val & 0xfff; - voodoo->bltClipLowY = (val >> 16) & 0xfff; - break; - - case SST_bltSrcXY: - voodoo->bltSrcX = val & 0x7ff; - voodoo->bltSrcY = (val >> 16) & 0x7ff; - break; - case SST_bltDstXY: -// voodoo_log("Write bltDstXY %08x\n", val); - voodoo->bltDstX = val & 0x7ff; - voodoo->bltDstY = (val >> 16) & 0x7ff; - if (val & (1 << 31)) - blit_start(voodoo); - break; - case SST_bltSize: -// voodoo_log("Write bltSize %08x\n", val); - voodoo->bltSizeX = val & 0xfff; - if (voodoo->bltSizeX & 0x800) - voodoo->bltSizeX |= 0xfffff000; - voodoo->bltSizeY = (val >> 16) & 0xfff; - if (voodoo->bltSizeY & 0x800) - voodoo->bltSizeY |= 0xfffff000; - if (val & (1 << 31)) - blit_start(voodoo); - break; - case SST_bltRop: - voodoo->bltRop[0] = val & 0xf; - voodoo->bltRop[1] = (val >> 4) & 0xf; - voodoo->bltRop[2] = (val >> 8) & 0xf; - voodoo->bltRop[3] = (val >> 12) & 0xf; - break; - case SST_bltColor: -// voodoo_log("Write bltColor %08x\n", val); - voodoo->bltColorFg = val & 0xffff; - voodoo->bltColorBg = (val >> 16) & 0xffff; - break; - - case SST_bltCommand: - voodoo->bltCommand = val; -// voodoo_log("Write bltCommand %08x\n", val); - if (val & (1 << 31)) - blit_start(voodoo); - break; - case SST_bltData: - blit_data(voodoo, val); - break; - - case SST_textureMode: - if (chip & CHIP_TREX0) - { - voodoo->params.textureMode[0] = val; - voodoo->params.tformat[0] = (val >> 8) & 0xf; - } - if (chip & CHIP_TREX1) - { - voodoo->params.textureMode[1] = val; - voodoo->params.tformat[1] = (val >> 8) & 0xf; - } - break; - case SST_tLOD: - if (chip & CHIP_TREX0) - { - voodoo->params.tLOD[0] = val; - voodoo_recalc_tex(voodoo, 0); - } - if (chip & CHIP_TREX1) - { - voodoo->params.tLOD[1] = val; - voodoo_recalc_tex(voodoo, 1); - } - break; - case SST_tDetail: - if (chip & CHIP_TREX0) - { - voodoo->params.detail_max[0] = val & 0xff; - voodoo->params.detail_bias[0] = (val >> 8) & 0x3f; - voodoo->params.detail_scale[0] = (val >> 14) & 7; - } - if (chip & CHIP_TREX1) - { - voodoo->params.detail_max[1] = val & 0xff; - voodoo->params.detail_bias[1] = (val >> 8) & 0x3f; - voodoo->params.detail_scale[1] = (val >> 14) & 7; - } - break; - case SST_texBaseAddr: - if (chip & CHIP_TREX0) - { - voodoo->params.texBaseAddr[0] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 0); - } - if (chip & CHIP_TREX1) - { - voodoo->params.texBaseAddr[1] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 1); - } - break; - case SST_texBaseAddr1: - if (chip & CHIP_TREX0) - { - voodoo->params.texBaseAddr1[0] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 0); - } - if (chip & CHIP_TREX1) - { - voodoo->params.texBaseAddr1[1] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 1); - } - break; - case SST_texBaseAddr2: - if (chip & CHIP_TREX0) - { - voodoo->params.texBaseAddr2[0] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 0); - } - if (chip & CHIP_TREX1) - { - voodoo->params.texBaseAddr2[1] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 1); - } - break; - case SST_texBaseAddr38: - if (chip & CHIP_TREX0) - { - voodoo->params.texBaseAddr38[0] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 0); - } - if (chip & CHIP_TREX1) - { - voodoo->params.texBaseAddr38[1] = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo, 1); - } - break; - - case SST_trexInit1: - if (chip & CHIP_TREX0) - voodoo->trexInit1[0] = val; - if (chip & CHIP_TREX1) - voodoo->trexInit1[1] = val; - break; - - case SST_nccTable0_Y0: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].y[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].y[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable0_Y1: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].y[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].y[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable0_Y2: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].y[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].y[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable0_Y3: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].y[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].y[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - - case SST_nccTable0_I0: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].i[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].i[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_I2: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].i[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].i[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_Q0: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].q[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].q[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_Q2: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].i[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].i[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - if (val & (1 << 31)) - { - int p = (val >> 23) & 0xfe; - if (chip & CHIP_TREX0) - { - voodoo->palette[0][p].u = val | 0xff000000; - voodoo->palette_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->palette[1][p].u = val | 0xff000000; - voodoo->palette_dirty[1] = 1; - } - } - break; - - case SST_nccTable0_I1: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].i[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].i[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_I3: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].i[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].i[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_Q1: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].q[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].q[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - case SST_nccTable0_Q3: - if (!(val & (1 << 31))) - { - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][0].q[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][0].q[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - } - if (val & (1 << 31)) - { - int p = ((val >> 23) & 0xfe) | 0x01; - if (chip & CHIP_TREX0) - { - voodoo->palette[0][p].u = val | 0xff000000; - voodoo->palette_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->palette[1][p].u = val | 0xff000000; - voodoo->palette_dirty[1] = 1; - } - } - break; - - case SST_nccTable1_Y0: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].y[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].y[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Y1: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].y[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].y[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Y2: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].y[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].y[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Y3: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].y[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].y[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_I0: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].i[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].i[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_I1: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].i[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].i[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_I2: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].i[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].i[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_I3: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].i[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].i[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Q0: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].q[0] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].q[0] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Q1: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].q[1] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].q[1] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Q2: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].q[2] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].q[2] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - case SST_nccTable1_Q3: - if (chip & CHIP_TREX0) - { - voodoo->nccTable[0][1].q[3] = val; - voodoo->ncc_dirty[0] = 1; - } - if (chip & CHIP_TREX1) - { - voodoo->nccTable[1][1].q[3] = val; - voodoo->ncc_dirty[1] = 1; - } - break; - - case SST_userIntrCMD: - fatal("userIntrCMD write %08x from FIFO\n", val); - break; - } -} - - -static uint16_t voodoo_fb_readw(uint32_t addr, void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - int x, y; - uint32_t read_addr; - uint16_t temp; - - x = (addr >> 1) & 0x3ff; - y = (addr >> 11) & 0x3ff; - - if (SLI_ENABLED) - { - voodoo_set_t *set = voodoo->set; - - if (y & 1) - voodoo = set->voodoos[1]; - else - voodoo = set->voodoos[0]; - - y >>= 1; - } - - read_addr = voodoo->fb_read_offset + (x << 1) + (y * voodoo->row_width); - - if (read_addr > voodoo->fb_mask) - return 0xffff; - - temp = *(uint16_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); - -// voodoo_log("voodoo_fb_readw : %08X %08X %i %i %08X %08X %08x:%08x %i\n", addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++); - return temp; -} -static uint32_t voodoo_fb_readl(uint32_t addr, void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - int x, y; - uint32_t read_addr; - uint32_t temp; - - x = addr & 0x7fe; - y = (addr >> 11) & 0x3ff; - - if (SLI_ENABLED) - { - voodoo_set_t *set = voodoo->set; - - if (y & 1) - voodoo = set->voodoos[1]; - else - voodoo = set->voodoos[0]; - - y >>= 1; - } - - read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width); - - if (read_addr > voodoo->fb_mask) - return 0xffffffff; - - temp = *(uint32_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); - -// voodoo_log("voodoo_fb_readl : %08X %08x %08X x=%i y=%i %08X %08X %08x:%08x %i ro=%08x rw=%i\n", addr, read_addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++, voodoo->fb_read_offset, voodoo->row_width); - return temp; -} - -static inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y) -{ - int r, g, b; - - if (dither) - { - if (dither2x2) - { - r = dither_rb2x2[col.r][y & 1][x & 1]; - g = dither_g2x2[col.g][y & 1][x & 1]; - b = dither_rb2x2[col.b][y & 1][x & 1]; - } - else - { - r = dither_rb[col.r][y & 3][x & 3]; - g = dither_g[col.g][y & 3][x & 3]; - b = dither_rb[col.b][y & 3][x & 3]; - } - } - else - { - r = col.r >> 3; - g = col.g >> 2; - b = col.b >> 3; - } - - return b | (g << 5) | (r << 11); -} - -static void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - voodoo_params_t *params = &voodoo->params; - int x, y; - uint32_t write_addr, write_addr_aux; - rgba8_t colour_data; - uint16_t depth_data; - uint8_t alpha_data; - int write_mask = 0; - - colour_data.r = colour_data.g = colour_data.b = colour_data.a = 0; - - depth_data = voodoo->params.zaColor & 0xffff; - alpha_data = voodoo->params.zaColor >> 24; - -// while (!RB_EMPTY) -// thread_reset_event(voodoo->not_full_event); - -// voodoo_log("voodoo_fb_writew : %08X %04X\n", addr, val); - - - switch (voodoo->lfbMode & LFB_FORMAT_MASK) - { - case LFB_FORMAT_RGB565: - colour_data = rgb565[val]; - alpha_data = 0xff; - write_mask = LFB_WRITE_COLOUR; - break; - case LFB_FORMAT_RGB555: - colour_data = argb1555[val]; - alpha_data = 0xff; - write_mask = LFB_WRITE_COLOUR; - break; - case LFB_FORMAT_ARGB1555: - colour_data = argb1555[val]; - alpha_data = colour_data.a; - write_mask = LFB_WRITE_COLOUR; - break; - case LFB_FORMAT_DEPTH: - depth_data = val; - write_mask = LFB_WRITE_DEPTH; - break; - - default: - fatal("voodoo_fb_writew : bad LFB format %08X\n", voodoo->lfbMode); - } - - x = addr & 0x7fe; - y = (addr >> 11) & 0x3ff; - - if (SLI_ENABLED) - { - if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || - ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) - return; - y >>= 1; - } - - - if (voodoo->fb_write_offset == voodoo->params.front_offset) - voodoo->dirty_line[y] = 1; - - write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); - write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); - -// voodoo_log("fb_writew %08x %i %i %i %08x\n", addr, x, y, voodoo->row_width, write_addr); - - if (voodoo->lfbMode & 0x100) - { - { - rgba8_t write_data = colour_data; - uint16_t new_depth = depth_data; - - if (params->fbzMode & FBZ_DEPTH_ENABLE) - { - uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); - - DEPTH_TEST(new_depth); - } - - if ((params->fbzMode & FBZ_CHROMAKEY) && - write_data.r == params->chromaKey_r && - write_data.g == params->chromaKey_g && - write_data.b == params->chromaKey_b) - goto skip_pixel; - - if (params->fogMode & FOG_ENABLE) - { - int32_t z = new_depth << 12; - int64_t w_depth = (int64_t)(int32_t)new_depth; - int32_t ia = alpha_data << 12; - - APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); - } - - if (params->alphaMode & 1) - ALPHA_TEST(alpha_data); - - if (params->alphaMode & (1 << 4)) - { - uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); - int dest_r, dest_g, dest_b, dest_a; - - dest_r = (dat >> 8) & 0xf8; - dest_g = (dat >> 3) & 0xfc; - dest_b = (dat << 3) & 0xf8; - dest_r |= (dest_r >> 5); - dest_g |= (dest_g >> 6); - dest_b |= (dest_b >> 5); - dest_a = 0xff; - - ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data); - } - - if (params->fbzMode & FBZ_RGB_WMASK) - *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, x >> 1, y); - if (params->fbzMode & FBZ_DEPTH_WMASK) - *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; - -skip_pixel: - (void)x; - } - } - else - { - if (write_mask & LFB_WRITE_COLOUR) - *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data, x >> 1, y); - if (write_mask & LFB_WRITE_DEPTH) - *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data; - } -} - - -static void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - voodoo_params_t *params = &voodoo->params; - int x, y; - uint32_t write_addr, write_addr_aux; - rgba8_t colour_data[2]; - uint16_t depth_data[2]; - uint8_t alpha_data[2]; - int write_mask = 0, count = 1; - - depth_data[0] = depth_data[1] = voodoo->params.zaColor & 0xffff; - alpha_data[0] = alpha_data[1] = voodoo->params.zaColor >> 24; -// while (!RB_EMPTY) -// thread_reset_event(voodoo->not_full_event); - -// voodoo_log("voodoo_fb_writel : %08X %08X\n", addr, val); - - switch (voodoo->lfbMode & LFB_FORMAT_MASK) - { - case LFB_FORMAT_RGB565: - colour_data[0] = rgb565[val & 0xffff]; - colour_data[1] = rgb565[val >> 16]; - write_mask = LFB_WRITE_COLOUR; - count = 2; - break; - case LFB_FORMAT_RGB555: - colour_data[0] = argb1555[val & 0xffff]; - colour_data[1] = argb1555[val >> 16]; - write_mask = LFB_WRITE_COLOUR; - count = 2; - break; - case LFB_FORMAT_ARGB1555: - colour_data[0] = argb1555[val & 0xffff]; - alpha_data[0] = colour_data[0].a; - colour_data[1] = argb1555[val >> 16]; - alpha_data[1] = colour_data[1].a; - write_mask = LFB_WRITE_COLOUR; - count = 2; - break; - - case LFB_FORMAT_ARGB8888: - colour_data[0].b = val & 0xff; - colour_data[0].g = (val >> 8) & 0xff; - colour_data[0].r = (val >> 16) & 0xff; - colour_data[0].a = alpha_data[0] = (val >> 24) & 0xff; - write_mask = LFB_WRITE_COLOUR; - addr >>= 1; - break; - - case LFB_FORMAT_DEPTH: - depth_data[0] = val; - depth_data[1] = val >> 16; - write_mask = LFB_WRITE_DEPTH; - count = 2; - break; - - default: - fatal("voodoo_fb_writel : bad LFB format %08X\n", voodoo->lfbMode); - } - - x = addr & 0x7fe; - y = (addr >> 11) & 0x3ff; - - if (SLI_ENABLED) - { - if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || - ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) - return; - y >>= 1; - } - - if (voodoo->fb_write_offset == voodoo->params.front_offset) - voodoo->dirty_line[y] = 1; - - write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); - write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); - -// voodoo_log("fb_writel %08x x=%i y=%i rw=%i %08x wo=%08x\n", addr, x, y, voodoo->row_width, write_addr, voodoo->fb_write_offset); - - if (voodoo->lfbMode & 0x100) - { - int c; - - for (c = 0; c < count; c++) - { - rgba8_t write_data = colour_data[c]; - uint16_t new_depth = depth_data[c]; - - if (params->fbzMode & FBZ_DEPTH_ENABLE) - { - uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); - - DEPTH_TEST(new_depth); - } - - if ((params->fbzMode & FBZ_CHROMAKEY) && - write_data.r == params->chromaKey_r && - write_data.g == params->chromaKey_g && - write_data.b == params->chromaKey_b) - goto skip_pixel; - - if (params->fogMode & FOG_ENABLE) - { - int32_t z = new_depth << 12; - int64_t w_depth = new_depth; - int32_t ia = alpha_data[c] << 12; - - APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); - } - - if (params->alphaMode & 1) - ALPHA_TEST(alpha_data[c]); - - if (params->alphaMode & (1 << 4)) - { - uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); - int dest_r, dest_g, dest_b, dest_a; - - dest_r = (dat >> 8) & 0xf8; - dest_g = (dat >> 3) & 0xfc; - dest_b = (dat << 3) & 0xf8; - dest_r |= (dest_r >> 5); - dest_g |= (dest_g >> 6); - dest_b |= (dest_b >> 5); - dest_a = 0xff; - - ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data[c]); - } - - if (params->fbzMode & FBZ_RGB_WMASK) - *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, (x >> 1) + c, y); - if (params->fbzMode & FBZ_DEPTH_WMASK) - *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; - -skip_pixel: - write_addr += 2; - write_addr_aux += 2; - } - } - else - { - int c; - - for (c = 0; c < count; c++) - { - if (write_mask & LFB_WRITE_COLOUR) - *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data[c], (x >> 1) + c, y); - if (write_mask & LFB_WRITE_DEPTH) - *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data[c]; - - write_addr += 2; - write_addr_aux += 2; - } - } -} - -static void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) -{ - int lod, s, t; - voodoo_t *voodoo = (voodoo_t *)p; - int tmu; - - if (addr & 0x400000) - return; /*TREX != 0*/ - - tmu = (addr & 0x200000) ? 1 : 0; - - if (tmu && !voodoo->dual_tmus) - return; - -// voodoo_log("voodoo_tex_writel : %08X %08X %i\n", addr, val, voodoo->params.tformat); - - lod = (addr >> 17) & 0xf; - t = (addr >> 9) & 0xff; - if (voodoo->params.tformat[tmu] & 8) - s = (addr >> 1) & 0xfe; - else - { - if (voodoo->params.textureMode[tmu] & (1 << 31)) - s = addr & 0xfc; - else - s = (addr >> 1) & 0xfc; - } - - if (lod > LOD_MAX) - return; - -// if (addr >= 0x200000) -// return; - - if (voodoo->params.tformat[tmu] & 8) - addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2; - else - addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]); - if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) - { -// voodoo_log("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); - flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu); - } - *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val; -} - -#define WAKE_DELAY (TIMER_USEC * 100) -static inline void wake_fifo_thread(voodoo_t *voodoo) -{ - if (!timer_is_enabled(&voodoo->wake_timer)) - { - /*Don't wake FIFO thread immediately - if we do that it will probably - process one word and go back to sleep, requiring it to be woken on - almost every write. Instead, wait a short while so that the CPU - emulation writes more data so we have more batched-up work.*/ - timer_set_delay_u64(&voodoo->wake_timer, WAKE_DELAY); - } -} - -static inline void wake_fifo_thread_now(voodoo_t *voodoo) -{ - thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - -static void voodoo_wake_timer(void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - - thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - -static inline void queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) -{ - fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK]; - - while (FIFO_FULL) - { - thread_reset_event(voodoo->fifo_not_full_event); - if (FIFO_FULL) - { - thread_wait_event(voodoo->fifo_not_full_event, 1); /*Wait for room in ringbuffer*/ - if (FIFO_FULL) - wake_fifo_thread_now(voodoo); - } - } - - fifo->val = val; - fifo->addr_type = addr_type; - - voodoo->fifo_write_idx++; - - if (FIFO_ENTRIES > 0xe000) - wake_fifo_thread(voodoo); -} static uint16_t voodoo_readw(uint32_t addr, void *p) { @@ -5979,10 +167,10 @@ static uint16_t voodoo_readw(uint32_t addr, void *p) voodoo->flush = 1; while (!FIFO_EMPTY) { - wake_fifo_thread_now(voodoo); + voodoo_wake_fifo_thread_now(voodoo); thread_wait_event(voodoo->fifo_not_full_event, 1); } - wait_for_render_thread_idle(voodoo); + voodoo_wait_for_render_thread_idle(voodoo); voodoo->flush = 0; return voodoo_fb_readw(addr, voodoo); @@ -5991,29 +179,11 @@ static uint16_t voodoo_readw(uint32_t addr, void *p) return 0xffff; } -static void voodoo_flush(voodoo_t *voodoo) -{ - voodoo->flush = 1; - while (!FIFO_EMPTY) - { - wake_fifo_thread_now(voodoo); - thread_wait_event(voodoo->fifo_not_full_event, 1); - } - wait_for_render_thread_idle(voodoo); - voodoo->flush = 0; -} - -static void wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo) -{ - wake_fifo_thread(voodoo); - if (SLI_ENABLED && voodoo->type != VOODOO_2 && set->voodoos[0] == voodoo) - wake_fifo_thread(set->voodoos[1]); -} static uint32_t voodoo_readl(uint32_t addr, void *p) { voodoo_t *voodoo = (voodoo_t *)p; - uint32_t temp = 0; + uint32_t temp; int fifo_size; voodoo->rd_count++; addr &= 0xffffff; @@ -6039,10 +209,10 @@ static uint32_t voodoo_readl(uint32_t addr, void *p) voodoo->flush = 1; while (!FIFO_EMPTY) { - wake_fifo_thread_now(voodoo); + voodoo_wake_fifo_thread_now(voodoo); thread_wait_event(voodoo->fifo_not_full_event, 1); } - wait_for_render_thread_idle(voodoo); + voodoo_wait_for_render_thread_idle(voodoo); voodoo->flush = 0; temp = voodoo_fb_readl(addr, voodoo); @@ -6069,7 +239,7 @@ static uint32_t voodoo_readl(uint32_t addr, void *p) (voodoo_other->cmdfifo_depth_rd != voodoo_other->cmdfifo_depth_wr)) busy = 1; if (!voodoo_other->voodoo_busy) - wake_fifo_thread(voodoo_other); + voodoo_wake_fifo_thread(voodoo_other); } fifo_size = 0xffff - fifo_entries; @@ -6089,7 +259,7 @@ static uint32_t voodoo_readl(uint32_t addr, void *p) temp |= 0x380; /*Busy*/ if (!voodoo->voodoo_busy) - wake_fifo_thread(voodoo); + voodoo_wake_fifo_thread(voodoo); } break; @@ -6174,20 +344,20 @@ static uint32_t voodoo_readl(uint32_t addr, void *p) temp = voodoo->line & 0x1fff; break; case SST_hvRetrace: - { - uint32_t line_time = (uint32_t)(voodoo->line_time >> 32); - uint32_t diff = (timer_get_ts_int(&voodoo->timer) > (tsc & 0xffffffff)) ? (timer_get_ts_int(&voodoo->timer) - (tsc & 0xffffffff)) : 0; - uint32_t pre_div = diff * voodoo->h_total; - uint32_t post_div = pre_div / line_time; - uint32_t h_pos = (voodoo->h_total - 1) - post_div; + { + uint32_t line_time = (uint32_t)(voodoo->line_time >> 32); + uint32_t diff = (timer_get_ts_int(&voodoo->timer) > (tsc & 0xffffffff)) ? (timer_get_ts_int(&voodoo->timer) - (tsc & 0xffffffff)) : 0; + uint32_t pre_div = diff * voodoo->h_total; + uint32_t post_div = pre_div / line_time; + uint32_t h_pos = (voodoo->h_total - 1) - post_div; - if (h_pos >= voodoo->h_total) - h_pos = 0; + if (h_pos >= voodoo->h_total) + h_pos = 0; - temp = voodoo->line & 0x1fff; - temp |= (h_pos << 16); + temp = voodoo->line & 0x1fff; + temp |= (h_pos << 16); } - break; + break; case SST_fbiInit5: temp = voodoo->fbiInit5 & ~0x1ff; @@ -6230,39 +400,11 @@ static void voodoo_writew(uint32_t addr, uint16_t val, void *p) voodoo_t *voodoo = (voodoo_t *)p; voodoo->wr_count++; addr &= 0xffffff; - - if (addr == voodoo->last_write_addr+4) - sub_cycles(voodoo->burst_time); - else - sub_cycles(voodoo->write_time); - voodoo->last_write_addr = addr; + + sub_cycles(voodoo->write_time); if ((addr & 0xc00000) == 0x400000) /*Framebuffer*/ - queue_command(voodoo, addr | FIFO_WRITEW_FB, val); -} - -static void voodoo_pixelclock_update(voodoo_t *voodoo) -{ - int m = (voodoo->dac_pll_regs[0] & 0x7f) + 2; - int n1 = ((voodoo->dac_pll_regs[0] >> 8) & 0x1f) + 2; - int n2 = ((voodoo->dac_pll_regs[0] >> 13) & 0x07); - float t = (14318184.0 * ((float)m / (float)n1)) / (float)(1 << n2); - double clock_const; - int line_length; - - if ((voodoo->dac_data[6] & 0xf0) == 0x20 || - (voodoo->dac_data[6] & 0xf0) == 0x60 || - (voodoo->dac_data[6] & 0xf0) == 0x70) - t /= 2.0f; - - line_length = (voodoo->hSync & 0xff) + ((voodoo->hSync >> 16) & 0x3ff); - -// voodoo_log("Pixel clock %f MHz hsync %08x line_length %d\n", t, voodoo->hSync, line_length); - - voodoo->pixel_clock = t; - - clock_const = cpuclock / t; - voodoo->line_time = (uint64_t)((double)line_length * clock_const * (double)(1ULL << 32)); + voodoo_queue_command(voodoo, addr | FIFO_WRITEW_FB, val); } static void voodoo_writel(uint32_t addr, uint32_t val, void *p) @@ -6282,11 +424,11 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) if (addr & 0x800000) /*Texture*/ { voodoo->tex_count++; - queue_command(voodoo, addr | FIFO_WRITEL_TEX, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_TEX, val); } else if (addr & 0x400000) /*Framebuffer*/ { - queue_command(voodoo, addr | FIFO_WRITEL_FB, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_FB, val); } else if ((addr & 0x200000) && (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)) { @@ -6294,7 +436,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) *(uint32_t *)&voodoo->fb_mem[(voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask] = val; voodoo->cmdfifo_depth_wr++; if ((voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd) < 20) - wake_fifo_thread(voodoo); + voodoo_wake_fifo_thread(voodoo); } else switch (addr & 0x3fc) { @@ -6311,41 +453,41 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) voodoo->swap_count++; if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) return; - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); if (!voodoo->voodoo_busy) - wake_fifo_threads(voodoo->set, voodoo); + voodoo_wake_fifo_threads(voodoo->set, voodoo); break; case SST_triangleCMD: if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) return; voodoo->cmd_written++; - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); if (!voodoo->voodoo_busy) - wake_fifo_threads(voodoo->set, voodoo); + voodoo_wake_fifo_threads(voodoo->set, voodoo); break; case SST_ftriangleCMD: if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) return; voodoo->cmd_written++; - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); if (!voodoo->voodoo_busy) - wake_fifo_threads(voodoo->set, voodoo); + voodoo_wake_fifo_threads(voodoo->set, voodoo); break; case SST_fastfillCMD: if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) return; voodoo->cmd_written++; - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); if (!voodoo->voodoo_busy) - wake_fifo_threads(voodoo->set, voodoo); + voodoo_wake_fifo_threads(voodoo->set, voodoo); break; case SST_nopCMD: if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) return; voodoo->cmd_written++; - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); if (!voodoo->voodoo_busy) - wake_fifo_threads(voodoo->set, voodoo); + voodoo_wake_fifo_threads(voodoo->set, voodoo); break; case SST_fbiInit4: @@ -6497,7 +639,10 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) break; case SST_fbiInit7: if (voodoo->initEnable & 0x01) + { voodoo->fbiInit7 = val; + voodoo->cmdfifo_enabled = val & 0x100; + } break; case SST_cmdFifoBaseAddr: @@ -6527,7 +672,7 @@ static void voodoo_writel(uint32_t addr, uint32_t val, void *p) } else { - queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); } break; } @@ -6561,274 +706,6 @@ static void voodoo_snoop_writel(uint32_t addr, uint32_t val, void *p) voodoo_writel(addr, val, set->voodoos[1]); } -static uint32_t cmdfifo_get(voodoo_t *voodoo) -{ - uint32_t val; - - while (voodoo->cmdfifo_depth_rd == voodoo->cmdfifo_depth_wr) - { - thread_wait_event(voodoo->wake_fifo_thread, -1); - thread_reset_event(voodoo->wake_fifo_thread); - } - - val = *(uint32_t *)&voodoo->fb_mem[voodoo->cmdfifo_rp & voodoo->fb_mask]; - - voodoo->cmdfifo_depth_rd++; - voodoo->cmdfifo_rp += 4; - -// voodoo_log(" CMDFIFO get %08x\n", val); - return val; -} - -static inline float cmdfifo_get_f(voodoo_t *voodoo) -{ - union - { - uint32_t i; - float f; - } tempif; - - tempif.i = cmdfifo_get(voodoo); - return tempif.f; -} - -enum -{ - CMDFIFO3_PC_MASK_RGB = (1 << 10), - CMDFIFO3_PC_MASK_ALPHA = (1 << 11), - CMDFIFO3_PC_MASK_Z = (1 << 12), - CMDFIFO3_PC_MASK_Wb = (1 << 13), - CMDFIFO3_PC_MASK_W0 = (1 << 14), - CMDFIFO3_PC_MASK_S0_T0 = (1 << 15), - CMDFIFO3_PC_MASK_W1 = (1 << 16), - CMDFIFO3_PC_MASK_S1_T1 = (1 << 17), - - CMDFIFO3_PC = (1 << 28) -}; - -static void fifo_thread(void *param) -{ - voodoo_t *voodoo = (voodoo_t *)param; - - while (1) - { - thread_set_event(voodoo->fifo_not_full_event); - thread_wait_event(voodoo->wake_fifo_thread, -1); - thread_reset_event(voodoo->wake_fifo_thread); - voodoo->voodoo_busy = 1; - while (!FIFO_EMPTY) - { - uint64_t start_time = plat_timer_read(); - uint64_t end_time; - fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; - - switch (fifo->addr_type & FIFO_TYPE) - { - case FIFO_WRITEL_REG: - voodoo_reg_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); - break; - case FIFO_WRITEW_FB: - wait_for_render_thread_idle(voodoo); - voodoo_fb_writew(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); - break; - case FIFO_WRITEL_FB: - wait_for_render_thread_idle(voodoo); - voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); - break; - case FIFO_WRITEL_TEX: - if (!(fifo->addr_type & 0x400000)) - voodoo_tex_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); - break; - } - voodoo->fifo_read_idx++; - fifo->addr_type = FIFO_INVALID; - - if (FIFO_ENTRIES > 0xe000) - thread_set_event(voodoo->fifo_not_full_event); - - end_time = plat_timer_read(); - voodoo->time += end_time - start_time; - } - - while (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) - { - uint64_t start_time = plat_timer_read(); - uint64_t end_time; - uint32_t header = cmdfifo_get(voodoo); - uint32_t addr; - uint32_t mask; - int smode; - int num; - int num_verticies; - int v_num; - -// voodoo_log(" CMDFIFO header %08x at %08x\n", header, voodoo->cmdfifo_rp); - - switch (header & 7) - { - case 0: -// voodoo_log("CMDFIFO0\n"); - switch ((header >> 3) & 7) - { - case 0: /*NOP*/ - break; - - case 3: /*JMP local frame buffer*/ - voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc; -// voodoo_log("JMP to %08x %04x\n", voodoo->cmdfifo_rp, header); - break; - - default: - fatal("Bad CMDFIFO0 %08x\n", header); - } - break; - - case 1: - num = header >> 16; - addr = (header & 0x7ff8) >> 1; -// voodoo_log("CMDFIFO1 addr=%08x\n",addr); - while (num--) - { - uint32_t val = cmdfifo_get(voodoo); - if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || - (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) - voodoo->cmd_written_fifo++; - - voodoo_reg_writel(addr, val, voodoo); - - if (header & (1 << 15)) - addr += 4; - } - break; - - case 3: - num = (header >> 29) & 7; - mask = header;//(header >> 10) & 0xff; - smode = (header >> 22) & 0xf; - voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); - num_verticies = (header >> 6) & 0xf; - v_num = 0; - if (((header >> 3) & 7) == 2) - v_num = 1; -// voodoo_log("CMDFIFO3: num=%i verts=%i mask=%02x\n", num, num_verticies, (header >> 10) & 0xff); -// voodoo_log("CMDFIFO3 %02x %i\n", (header >> 10), (header >> 3) & 7); - - while (num_verticies--) - { - voodoo->verts[3].sVx = cmdfifo_get_f(voodoo); - voodoo->verts[3].sVy = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_RGB) - { - if (header & CMDFIFO3_PC) - { - uint32_t val = cmdfifo_get(voodoo); - voodoo->verts[3].sBlue = (float)(val & 0xff); - voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); - voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); - voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); - } - else - { - voodoo->verts[3].sRed = cmdfifo_get_f(voodoo); - voodoo->verts[3].sGreen = cmdfifo_get_f(voodoo); - voodoo->verts[3].sBlue = cmdfifo_get_f(voodoo); - } - } - if ((mask & CMDFIFO3_PC_MASK_ALPHA) && !(header & CMDFIFO3_PC)) - voodoo->verts[3].sAlpha = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_Z) - voodoo->verts[3].sVz = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_Wb) - voodoo->verts[3].sWb = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_W0) - voodoo->verts[3].sW0 = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_S0_T0) - { - voodoo->verts[3].sS0 = cmdfifo_get_f(voodoo); - voodoo->verts[3].sT0 = cmdfifo_get_f(voodoo); - } - if (mask & CMDFIFO3_PC_MASK_W1) - voodoo->verts[3].sW1 = cmdfifo_get_f(voodoo); - if (mask & CMDFIFO3_PC_MASK_S1_T1) - { - voodoo->verts[3].sS1 = cmdfifo_get_f(voodoo); - voodoo->verts[3].sT1 = cmdfifo_get_f(voodoo); - } - if (v_num) - voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo); - else - voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo); - v_num++; - if (v_num == 3 && ((header >> 3) & 7) == 0) - v_num = 0; - } - break; - - case 4: - num = (header >> 29) & 7; - mask = (header >> 15) & 0x3fff; - addr = (header & 0x7ff8) >> 1; -// voodoo_log("CMDFIFO4 addr=%08x\n",addr); - while (mask) - { - if (mask & 1) - { - uint32_t val = cmdfifo_get(voodoo); - if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || - (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) - voodoo->cmd_written_fifo++; - - voodoo_reg_writel(addr, val, voodoo); - } - - addr += 4; - mask >>= 1; - } - while (num--) - cmdfifo_get(voodoo); - break; - - case 5: - if (header & 0x3fc0000) - fatal("CMDFIFO packet 5 has byte disables set %08x\n", header); - num = (header >> 3) & 0x7ffff; - addr = cmdfifo_get(voodoo) & 0xffffff; -// voodoo_log("CMDFIFO5 addr=%08x num=%i\n", addr, num); - switch (header >> 30) - { - case 2: /*Framebuffer*/ - while (num--) - { - uint32_t val = cmdfifo_get(voodoo); - voodoo_fb_writel(addr, val, voodoo); - addr += 4; - } - break; - case 3: /*Texture*/ - while (num--) - { - uint32_t val = cmdfifo_get(voodoo); - voodoo_tex_writel(addr, val, voodoo); - addr += 4; - } - break; - - default: - fatal("CMDFIFO packet 5 bad space %08x %08x\n", header, voodoo->cmdfifo_rp); - } - break; - - default: - voodoo_log("Bad CMDFIFO packet %08x %08x\n", header, voodoo->cmdfifo_rp); - } - - end_time = plat_timer_read(); - voodoo->time += end_time - start_time; - } - voodoo->voodoo_busy = 0; - } -} - static void voodoo_recalcmapping(voodoo_set_t *set) { if (set->nr_cards == 2) @@ -6975,546 +852,6 @@ void voodoo_pci_write(int func, int addr, uint8_t val, void *p) } } -static void voodoo_calc_clutData(voodoo_t *voodoo) -{ - int c; - - for (c = 0; c < 256; c++) - { - voodoo->clutData256[c].r = (voodoo->clutData[c >> 3].r*(8-(c & 7)) + - voodoo->clutData[(c >> 3)+1].r*(c & 7)) >> 3; - voodoo->clutData256[c].g = (voodoo->clutData[c >> 3].g*(8-(c & 7)) + - voodoo->clutData[(c >> 3)+1].g*(c & 7)) >> 3; - voodoo->clutData256[c].b = (voodoo->clutData[c >> 3].b*(8-(c & 7)) + - voodoo->clutData[(c >> 3)+1].b*(c & 7)) >> 3; - } - - for (c = 0; c < 65536; c++) - { - int r = (c >> 8) & 0xf8; - int g = (c >> 3) & 0xfc; - int b = (c << 3) & 0xf8; -// r |= (r >> 5); -// g |= (g >> 6); -// b |= (b >> 5); - - voodoo->video_16to32[c] = (voodoo->clutData256[r].r << 16) | (voodoo->clutData256[g].g << 8) | voodoo->clutData256[b].b; - } -} - - - -#define FILTDIV 256 - -static int FILTCAP, FILTCAPG, FILTCAPB = 0; /* color filter threshold values */ - -static void voodoo_generate_filter_v1(voodoo_t *voodoo) -{ - int g, h; - float difference, diffg, diffb; - float thiscol, thiscolg, thiscolb, lined; - float fcr, fcg, fcb; - - fcr = FILTCAP * 5; - fcg = FILTCAPG * 6; - fcb = FILTCAPB * 5; - - for (g=0;g FILTCAP) - difference = FILTCAP; - if (difference < -FILTCAP) - difference = -FILTCAP; - - if (diffg > FILTCAPG) - diffg = FILTCAPG; - if (diffg < -FILTCAPG) - diffg = -FILTCAPG; - - if (diffb > FILTCAPB) - diffb = FILTCAPB; - if (diffb < -FILTCAPB) - diffb = -FILTCAPB; - - // hack - to make it not bleed onto black - //if (g == 0){ - //difference = diffg = diffb = 0; - //} - - if ((difference < fcr) || (-difference > -fcr)) - thiscol = g + (difference / 2); - if ((diffg < fcg) || (-diffg > -fcg)) - thiscolg = g + (diffg / 2); /* need these divides so we can actually undither! */ - if ((diffb < fcb) || (-diffb > -fcb)) - thiscolb = g + (diffb / 2); - - if (thiscol < 0) - thiscol = 0; - if (thiscol > FILTDIV-1) - thiscol = FILTDIV-1; - - if (thiscolg < 0) - thiscolg = 0; - if (thiscolg > FILTDIV-1) - thiscolg = FILTDIV-1; - - if (thiscolb < 0) - thiscolb = 0; - if (thiscolb > FILTDIV-1) - thiscolb = FILTDIV-1; - - voodoo->thefilter[g][h] = thiscol; - voodoo->thefilterg[g][h] = thiscolg; - voodoo->thefilterb[g][h] = thiscolb; - } - - lined = g + 4; - if (lined > 255) - lined = 255; - voodoo->purpleline[g][0] = lined; - voodoo->purpleline[g][2] = lined; - - lined = g + 0; - if (lined > 255) - lined = 255; - voodoo->purpleline[g][1] = lined; - } -} - -static void voodoo_generate_filter_v2(voodoo_t *voodoo) -{ - int g, h; - float difference; - float thiscol, thiscolg, thiscolb, lined; - float clr, clg, clb = 0; - float fcr, fcg, fcb = 0; - - // pre-clamping - - fcr = FILTCAP; - fcg = FILTCAPG; - fcb = FILTCAPB; - - if (fcr > 32) fcr = 32; - if (fcg > 32) fcg = 32; - if (fcb > 32) fcb = 32; - - for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into - { - for (h=0;h<256;h++) // pixel 2 - our main pixel - { - float avg; - float avgdiff; - - difference = (float)(g - h); - avg = (float)((g + g + g + g + h) / 5); - avgdiff = avg - (float)((g + h + h + h + h) / 5); - if (avgdiff < 0) avgdiff *= -1; - if (difference < 0) difference *= -1; - - thiscol = thiscolg = thiscolb = g; - - // try lighten - if (h > g) - { - clr = clg = clb = avgdiff; - - if (clr>fcr) clr=fcr; - if (clg>fcg) clg=fcg; - if (clb>fcb) clb=fcb; - - - thiscol = g + clr; - thiscolg = g + clg; - thiscolb = g + clb; - - if (thiscol>g+FILTCAP) - thiscol=g+FILTCAP; - if (thiscolg>g+FILTCAPG) - thiscolg=g+FILTCAPG; - if (thiscolb>g+FILTCAPB) - thiscolb=g+FILTCAPB; - - - if (thiscol>g+avgdiff) - thiscol=g+avgdiff; - if (thiscolg>g+avgdiff) - thiscolg=g+avgdiff; - if (thiscolb>g+avgdiff) - thiscolb=g+avgdiff; - - } - - if (difference > FILTCAP) - thiscol = g; - if (difference > FILTCAPG) - thiscolg = g; - if (difference > FILTCAPB) - thiscolb = g; - - // clamp - if (thiscol < 0) thiscol = 0; - if (thiscolg < 0) thiscolg = 0; - if (thiscolb < 0) thiscolb = 0; - - if (thiscol > 255) thiscol = 255; - if (thiscolg > 255) thiscolg = 255; - if (thiscolb > 255) thiscolb = 255; - - // add to the table - voodoo->thefilter[g][h] = (thiscol); - voodoo->thefilterg[g][h] = (thiscolg); - voodoo->thefilterb[g][h] = (thiscolb); - - // debug the ones that don't give us much of a difference - //if (difference < FILTCAP) - //voodoo_log("Voodoofilter: %ix%i - %f difference, %f average difference, R=%f, G=%f, B=%f\n", g, h, difference, avgdiff, thiscol, thiscolg, thiscolb); - } - - lined = g + 3; - if (lined > 255) - lined = 255; - voodoo->purpleline[g][0] = lined; - voodoo->purpleline[g][1] = 0; - voodoo->purpleline[g][2] = lined; - } -} - -static void voodoo_threshold_check(voodoo_t *voodoo) -{ - int r, g, b; - - if (!voodoo->scrfilterEnabled) - return; /* considered disabled; don't check and generate */ - - /* Check for changes, to generate anew table */ - if (voodoo->scrfilterThreshold != voodoo->scrfilterThresholdOld) - { - r = (voodoo->scrfilterThreshold >> 16) & 0xFF; - g = (voodoo->scrfilterThreshold >> 8 ) & 0xFF; - b = voodoo->scrfilterThreshold & 0xFF; - - FILTCAP = r; - FILTCAPG = g; - FILTCAPB = b; - - voodoo_log("Voodoo Filter Threshold Check: %06x - RED %i GREEN %i BLUE %i\n", voodoo->scrfilterThreshold, r, g, b); - - voodoo->scrfilterThresholdOld = voodoo->scrfilterThreshold; - - if (voodoo->type == VOODOO_2) - voodoo_generate_filter_v2(voodoo); - else - voodoo_generate_filter_v1(voodoo); - } -} - -static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) -{ - int x; - - // Scratchpad for avoiding feedback streaks - uint8_t *fil3 = malloc((voodoo->h_disp) * 3); - if (fil3 == NULL) fatal("fil3 = NULL"); - - /* 16 to 32-bit */ - for (x=0; x> 5) & 63) << 2); - fil[x*3+2] = (((src[x] >> 11) & 31) << 3); - - // Copy to our scratchpads - fil3[x*3+0] = fil[x*3+0]; - fil3[x*3+1] = fil[x*3+1]; - fil3[x*3+2] = fil[x*3+2]; - } - - - /* lines */ - - if (line & 1) - { - for (x=0; xpurpleline[fil[x*3]][0]; - fil[x*3+1] = voodoo->purpleline[fil[x*3+1]][1]; - fil[x*3+2] = voodoo->purpleline[fil[x*3+2]][2]; - } - } - - - /* filtering time */ - - for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; - fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; - fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; - } - - for (x=1; xthefilterb[fil3[x*3]][fil3[ (x-1) *3]]; - fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x-1) *3+1]]; - fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x-1) *3+2]]; - } - - for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; - fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; - fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; - } - - for (x=0; xthefilterb[fil3[x*3]][fil3[ (x+1) *3]]; - fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x+1) *3+1]]; - fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x+1) *3+2]]; - } - - free(fil3); -} - - -static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) -{ - int x; - - // Scratchpad for blending filter - uint8_t *fil3 = malloc((voodoo->h_disp) * 3); - if (fil3 == NULL) fatal("fil3 = NULL"); - - /* 16 to 32-bit */ - for (x=0; x> 5) & 63) << 2); - fil3[x*3+2] = fil[x*3+2] = (((src[x] >> 11) & 31) << 3); - } - - /* filtering time */ - - for (x=1; xthefilterb [((src[x+3] & 31) << 3)] [((src[x] & 31) << 3)]; - fil3[(x+3)*3+1] = voodoo->thefilterg [(((src[x+3] >> 5) & 63) << 2)] [(((src[x] >> 5) & 63) << 2)]; - fil3[(x+3)*3+2] = voodoo->thefilter [(((src[x+3] >> 11) & 31) << 3)] [(((src[x] >> 11) & 31) << 3)]; - - fil[(x+2)*3] = voodoo->thefilterb [fil3[(x+2)*3]][((src[x] & 31) << 3)]; - fil[(x+2)*3+1] = voodoo->thefilterg [fil3[(x+2)*3+1]][(((src[x] >> 5) & 63) << 2)]; - fil[(x+2)*3+2] = voodoo->thefilter [fil3[(x+2)*3+2]][(((src[x] >> 11) & 31) << 3)]; - - fil3[(x+1)*3] = voodoo->thefilterb [fil[(x+1)*3]][((src[x] & 31) << 3)]; - fil3[(x+1)*3+1] = voodoo->thefilterg [fil[(x+1)*3+1]][(((src[x] >> 5) & 63) << 2)]; - fil3[(x+1)*3+2] = voodoo->thefilter [fil[(x+1)*3+2]][(((src[x] >> 11) & 31) << 3)]; - - fil[(x-1)*3] = voodoo->thefilterb [fil3[(x-1)*3]][((src[x] & 31) << 3)]; - fil[(x-1)*3+1] = voodoo->thefilterg [fil3[(x-1)*3+1]][(((src[x] >> 5) & 63) << 2)]; - fil[(x-1)*3+2] = voodoo->thefilter [fil3[(x-1)*3+2]][(((src[x] >> 11) & 31) << 3)]; - } - - // unroll for edge cases - - fil3[(column-3)*3] = voodoo->thefilterb [((src[column-3] & 31) << 3)] [((src[column] & 31) << 3)]; - fil3[(column-3)*3+1] = voodoo->thefilterg [(((src[column-3] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; - fil3[(column-3)*3+2] = voodoo->thefilter [(((src[column-3] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; - - fil3[(column-2)*3] = voodoo->thefilterb [((src[column-2] & 31) << 3)] [((src[column] & 31) << 3)]; - fil3[(column-2)*3+1] = voodoo->thefilterg [(((src[column-2] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; - fil3[(column-2)*3+2] = voodoo->thefilter [(((src[column-2] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; - - fil3[(column-1)*3] = voodoo->thefilterb [((src[column-1] & 31) << 3)] [((src[column] & 31) << 3)]; - fil3[(column-1)*3+1] = voodoo->thefilterg [(((src[column-1] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; - fil3[(column-1)*3+2] = voodoo->thefilter [(((src[column-1] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; - - fil[(column-2)*3] = voodoo->thefilterb [fil3[(column-2)*3]][((src[column] & 31) << 3)]; - fil[(column-2)*3+1] = voodoo->thefilterg [fil3[(column-2)*3+1]][(((src[column] >> 5) & 63) << 2)]; - fil[(column-2)*3+2] = voodoo->thefilter [fil3[(column-2)*3+2]][(((src[column] >> 11) & 31) << 3)]; - - fil[(column-1)*3] = voodoo->thefilterb [fil3[(column-1)*3]][((src[column] & 31) << 3)]; - fil[(column-1)*3+1] = voodoo->thefilterg [fil3[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; - fil[(column-1)*3+2] = voodoo->thefilter [fil3[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; - - fil3[(column-1)*3] = voodoo->thefilterb [fil[(column-1)*3]][((src[column] & 31) << 3)]; - fil3[(column-1)*3+1] = voodoo->thefilterg [fil[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; - fil3[(column-1)*3+2] = voodoo->thefilter [fil[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; - - free(fil3); -} - -void voodoo_callback(void *p) -{ - voodoo_t *voodoo = (voodoo_t *)p; - - if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) - { - if (voodoo->line < voodoo->v_disp) - { - voodoo_t *draw_voodoo; - int draw_line; - - if (SLI_ENABLED) - { - if (voodoo == voodoo->set->voodoos[1]) - goto skip_draw; - - if (((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) ? 1 : 0) == (voodoo->line & 1)) - draw_voodoo = voodoo; - else - draw_voodoo = voodoo->set->voodoos[1]; - draw_line = voodoo->line >> 1; - } - else - { - if (!(voodoo->fbiInit0 & 1)) - goto skip_draw; - draw_voodoo = voodoo; - draw_line = voodoo->line; - } - - if (draw_voodoo->dirty_line[draw_line]) - { - uint32_t *p = &buffer32->line[voodoo->line + 8][8]; - uint16_t *src = (uint16_t *)&draw_voodoo->fb_mem[draw_voodoo->front_offset + draw_line*draw_voodoo->row_width]; - int x; - - draw_voodoo->dirty_line[draw_line] = 0; - - if (voodoo->line < voodoo->dirty_line_low) - { - voodoo->dirty_line_low = voodoo->line; - video_wait_for_buffer(); - } - if (voodoo->line > voodoo->dirty_line_high) - voodoo->dirty_line_high = voodoo->line; - - /* Draw left overscan. */ - for (x = 0; x < 8; x++) - buffer32->line[voodoo->line + 8][x] = 0x00000000; - - if (voodoo->scrfilter && voodoo->scrfilterEnabled) - { - uint8_t *fil = malloc((voodoo->h_disp) * 3); /* interleaved 24-bit RGB */ - if (fil == NULL) fatal("fil = NULL"); - - if (voodoo->type == VOODOO_2) - voodoo_filterline_v2(voodoo, fil, voodoo->h_disp, src, voodoo->line); - else - voodoo_filterline_v1(voodoo, fil, voodoo->h_disp, src, voodoo->line); - - for (x = 0; x < voodoo->h_disp; x++) - { - p[x] = (voodoo->clutData256[fil[x*3]].b << 0 | voodoo->clutData256[fil[x*3+1]].g << 8 | voodoo->clutData256[fil[x*3+2]].r << 16); - } - - free(fil); - } - else - { - for (x = 0; x < voodoo->h_disp; x++) - { - p[x] = draw_voodoo->video_16to32[src[x]]; - } - } - - /* Draw right overscan. */ - for (x = 0; x < 8; x++) - buffer32->line[voodoo->line + 8][voodoo->h_disp + x + 8] = 0x00000000; - } - } - } -skip_draw: - if (voodoo->line == voodoo->v_disp) - { -// voodoo_log("retrace %i %i %08x %i\n", voodoo->retrace_count, voodoo->swap_interval, voodoo->swap_offset, voodoo->swap_pending); - voodoo->retrace_count++; - if (SLI_ENABLED && (voodoo->fbiInit2 & FBIINIT2_SWAP_ALGORITHM_MASK) == FBIINIT2_SWAP_ALGORITHM_SLI_SYNC) - { - if (voodoo == voodoo->set->voodoos[0]) - { - voodoo_t *voodoo_1 = voodoo->set->voodoos[1]; - - /*Only swap if both Voodoos are waiting for buffer swap*/ - if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval) && - voodoo_1->swap_pending && (voodoo_1->retrace_count > voodoo_1->swap_interval)) - { - memset(voodoo->dirty_line, 1, 1024); - voodoo->retrace_count = 0; - voodoo->front_offset = voodoo->swap_offset; - if (voodoo->swap_count > 0) - voodoo->swap_count--; - voodoo->swap_pending = 0; - - memset(voodoo_1->dirty_line, 1, 1024); - voodoo_1->retrace_count = 0; - voodoo_1->front_offset = voodoo_1->swap_offset; - if (voodoo_1->swap_count > 0) - voodoo_1->swap_count--; - voodoo_1->swap_pending = 0; - - thread_set_event(voodoo->wake_fifo_thread); - thread_set_event(voodoo_1->wake_fifo_thread); - - voodoo->frame_count++; - voodoo_1->frame_count++; - } - } - } - else - { - if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) - { - memset(voodoo->dirty_line, 1, 1024); - voodoo->retrace_count = 0; - voodoo->front_offset = voodoo->swap_offset; - if (voodoo->swap_count > 0) - voodoo->swap_count--; - voodoo->swap_pending = 0; - thread_set_event(voodoo->wake_fifo_thread); - voodoo->frame_count++; - } - } - voodoo->v_retrace = 1; - } - voodoo->line++; - - if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) - { - if (voodoo->line == voodoo->v_disp) - { - if (voodoo->dirty_line_high > voodoo->dirty_line_low) - svga_doblit(0, voodoo->v_disp, voodoo->h_disp, voodoo->v_disp-1, voodoo->svga); - if (voodoo->clutData_dirty) - { - voodoo->clutData_dirty = 0; - voodoo_calc_clutData(voodoo); - } - voodoo->dirty_line_high = -1; - voodoo->dirty_line_low = 2000; - } - } - - if (voodoo->line >= voodoo->v_total) - { - voodoo->line = 0; - voodoo->v_retrace = 0; - } - if (voodoo->line_time) - timer_advance_u64(&voodoo->timer, voodoo->line_time); - else - timer_advance_u64(&voodoo->timer, TIMER_USEC * 32); -} static void voodoo_speed_changed(void *p) { @@ -7552,8 +889,7 @@ void *voodoo_card_init() voodoo->use_recompiler = device_get_config_int("recompiler"); #endif voodoo->type = device_get_config_int("type"); - switch (voodoo->type) - { + switch (voodoo->type) { case VOODOO_1: voodoo->dual_tmus = 0; break; @@ -7581,8 +917,7 @@ void *voodoo_card_init() voodoo->tex_mem_w[0] = (uint16_t *)voodoo->tex_mem[0]; voodoo->tex_mem_w[1] = (uint16_t *)voodoo->tex_mem[1]; - for (c = 0; c < TEX_CACHE_MAX; c++) - { + for (c = 0; c < TEX_CACHE_MAX; c++) { voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); voodoo->texture_cache[0][c].base = -1; /*invalid*/ voodoo->texture_cache[0][c].refcount = 0; @@ -7602,19 +937,26 @@ void *voodoo_card_init() voodoo->wake_fifo_thread = thread_create_event(); voodoo->wake_render_thread[0] = thread_create_event(); voodoo->wake_render_thread[1] = thread_create_event(); + voodoo->wake_render_thread[2] = thread_create_event(); + voodoo->wake_render_thread[3] = thread_create_event(); voodoo->wake_main_thread = thread_create_event(); voodoo->fifo_not_full_event = thread_create_event(); voodoo->render_not_full_event[0] = thread_create_event(); voodoo->render_not_full_event[1] = thread_create_event(); - voodoo->fifo_thread = thread_create(fifo_thread, voodoo); - voodoo->render_thread[0] = thread_create(render_thread_1, voodoo); - if (voodoo->render_threads == 2) - voodoo->render_thread[1] = thread_create(render_thread_2, voodoo); - + voodoo->render_not_full_event[2] = thread_create_event(); + voodoo->render_not_full_event[3] = thread_create_event(); + voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo); + voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo); + if (voodoo->render_threads >= 2) + voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo); + if (voodoo->render_threads == 4) { + voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo); + voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo); + } + voodoo->swap_mutex = thread_create_mutex(); timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0); - for (c = 0; c < 0x100; c++) - { + for (c = 0; c < 0x100; c++) { rgb332[c].r = c & 0xe0; rgb332[c].g = (c << 3) & 0xe0; rgb332[c].b = (c << 6) & 0xc0; @@ -7629,8 +971,7 @@ void *voodoo_card_init() ai44[c].g = ai44[c].b = ai44[c].r; } - for (c = 0; c < 0x10000; c++) - { + for (c = 0; c < 0x10000; c++) { rgb565[c].r = (c >> 8) & 0xf8; rgb565[c].g = (c >> 3) & 0xfc; rgb565[c].b = (c << 3) & 0xf8; @@ -7671,6 +1012,119 @@ void *voodoo_card_init() return voodoo; } +void *voodoo_2d3d_card_init(int type) +{ + int c; + voodoo_t *voodoo = malloc(sizeof(voodoo_t)); + memset(voodoo, 0, sizeof(voodoo_t)); + + voodoo->bilinear_enabled = device_get_config_int("bilinear"); + voodoo->scrfilter = device_get_config_int("dacfilter"); + voodoo->render_threads = device_get_config_int("render_threads"); + voodoo->odd_even_mask = voodoo->render_threads - 1; +#ifndef NO_CODEGEN + voodoo->use_recompiler = device_get_config_int("recompiler"); +#endif + voodoo->type = type; + voodoo->dual_tmus = (type == VOODOO_3) ? 1 : 0; + + /*generate filter lookup tables*/ + voodoo_generate_filter_v2(voodoo); + + for (c = 0; c < TEX_CACHE_MAX; c++) { + voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[0][c].base = -1; /*invalid*/ + voodoo->texture_cache[0][c].refcount = 0; + if (voodoo->dual_tmus) + { + voodoo->texture_cache[1][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[1][c].base = -1; /*invalid*/ + voodoo->texture_cache[1][c].refcount = 0; + } + } + + timer_add(&voodoo->timer, voodoo_callback, voodoo, 1); + + voodoo->fbiInit0 = 0; + + voodoo->wake_fifo_thread = thread_create_event(); + voodoo->wake_render_thread[0] = thread_create_event(); + voodoo->wake_render_thread[1] = thread_create_event(); + voodoo->wake_render_thread[2] = thread_create_event(); + voodoo->wake_render_thread[3] = thread_create_event(); + voodoo->wake_main_thread = thread_create_event(); + voodoo->fifo_not_full_event = thread_create_event(); + voodoo->render_not_full_event[0] = thread_create_event(); + voodoo->render_not_full_event[1] = thread_create_event(); + voodoo->render_not_full_event[2] = thread_create_event(); + voodoo->render_not_full_event[3] = thread_create_event(); + voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo); + voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo); + if (voodoo->render_threads >= 2) + voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo); + if (voodoo->render_threads == 4) { + voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo); + voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo); + } + voodoo->swap_mutex = thread_create_mutex(); + timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0); + + for (c = 0; c < 0x100; c++) { + rgb332[c].r = c & 0xe0; + rgb332[c].g = (c << 3) & 0xe0; + rgb332[c].b = (c << 6) & 0xc0; + rgb332[c].r = rgb332[c].r | (rgb332[c].r >> 3) | (rgb332[c].r >> 6); + rgb332[c].g = rgb332[c].g | (rgb332[c].g >> 3) | (rgb332[c].g >> 6); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 2); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 4); + rgb332[c].a = 0xff; + + ai44[c].a = (c & 0xf0) | ((c & 0xf0) >> 4); + ai44[c].r = (c & 0x0f) | ((c & 0x0f) << 4); + ai44[c].g = ai44[c].b = ai44[c].r; + } + + for (c = 0; c < 0x10000; c++) { + rgb565[c].r = (c >> 8) & 0xf8; + rgb565[c].g = (c >> 3) & 0xfc; + rgb565[c].b = (c << 3) & 0xf8; + rgb565[c].r |= (rgb565[c].r >> 5); + rgb565[c].g |= (rgb565[c].g >> 6); + rgb565[c].b |= (rgb565[c].b >> 5); + rgb565[c].a = 0xff; + + argb1555[c].r = (c >> 7) & 0xf8; + argb1555[c].g = (c >> 2) & 0xf8; + argb1555[c].b = (c << 3) & 0xf8; + argb1555[c].r |= (argb1555[c].r >> 5); + argb1555[c].g |= (argb1555[c].g >> 5); + argb1555[c].b |= (argb1555[c].b >> 5); + argb1555[c].a = (c & 0x8000) ? 0xff : 0; + + argb4444[c].a = (c >> 8) & 0xf0; + argb4444[c].r = (c >> 4) & 0xf0; + argb4444[c].g = c & 0xf0; + argb4444[c].b = (c << 4) & 0xf0; + argb4444[c].a |= (argb4444[c].a >> 4); + argb4444[c].r |= (argb4444[c].r >> 4); + argb4444[c].g |= (argb4444[c].g >> 4); + argb4444[c].b |= (argb4444[c].b >> 4); + + ai88[c].a = (c >> 8); + ai88[c].r = c & 0xff; + ai88[c].g = c & 0xff; + ai88[c].b = c & 0xff; + } +#ifndef NO_CODEGEN + voodoo_codegen_init(voodoo); +#endif + + voodoo->disp_buffer = 0; + voodoo->draw_buffer = 1; + + return voodoo; +} + void *voodoo_init() { voodoo_set_t *voodoo_set = malloc(sizeof(voodoo_set_t)); @@ -7748,10 +1202,15 @@ void voodoo_card_close(voodoo_t *voodoo) } #endif */ + thread_kill(voodoo->fifo_thread); thread_kill(voodoo->render_thread[0]); - if (voodoo->render_threads == 2) + if (voodoo->render_threads >= 2) thread_kill(voodoo->render_thread[1]); + if (voodoo->render_threads == 4) { + thread_kill(voodoo->render_thread[2]); + thread_kill(voodoo->render_thread[3]); + } thread_destroy_event(voodoo->fifo_not_full_event); thread_destroy_event(voodoo->wake_main_thread); thread_destroy_event(voodoo->wake_fifo_thread); @@ -7769,10 +1228,13 @@ void voodoo_card_close(voodoo_t *voodoo) #ifndef NO_CODEGEN voodoo_codegen_close(voodoo); #endif - free(voodoo->fb_mem); - if (voodoo->dual_tmus) - free(voodoo->tex_mem[1]); - free(voodoo->tex_mem[0]); + if (voodoo->type < VOODOO_BANSHEE && voodoo->fb_mem) + { + free(voodoo->fb_mem); + if (voodoo->dual_tmus) + free(voodoo->tex_mem[1]); + free(voodoo->tex_mem[0]); + } free(voodoo); } @@ -7879,6 +1341,10 @@ static const device_config_t voodoo_config[] = .description = "2", .value = 2 }, + { + .description = "4", + .value = 4 + }, { .description = "" } diff --git a/src/video/vid_voodoo_banshee.c b/src/video/vid_voodoo_banshee.c new file mode 100644 index 000000000..4399e4fe9 --- /dev/null +++ b/src/video/vid_voodoo_banshee.c @@ -0,0 +1,2735 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * Voodoo Banshee and 3 specific emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/io.h> +#include <86box/mem.h> +#include <86box/pci.h> +#include <86box/rom.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_ddc.h> +#include <86box/vid_svga.h> +#include <86box/vid_svga_render.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_display.h> +#include <86box/vid_voodoo_fifo.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> + + +#ifdef CLAMP +#undef CLAMP +#endif + +static uint8_t vb_filter_v1_rb[256][256]; +static uint8_t vb_filter_v1_g [256][256]; + +static uint8_t vb_filter_bx_rb[256][256]; +static uint8_t vb_filter_bx_g [256][256]; + +enum +{ + TYPE_BANSHEE = 0, + TYPE_V3_2000, + TYPE_V3_3000 +}; + +typedef struct banshee_t +{ + svga_t svga; + + rom_t bios_rom; + + uint8_t pci_regs[256]; + + uint32_t memBaseAddr0; + uint32_t memBaseAddr1; + uint32_t ioBaseAddr; + + uint32_t agpInit0; + uint32_t dramInit0, dramInit1; + uint32_t lfbMemoryConfig; + uint32_t miscInit0, miscInit1; + uint32_t pciInit0; + uint32_t vgaInit0, vgaInit1; + + uint32_t command_2d; + uint32_t srcBaseAddr_2d; + + uint32_t pllCtrl0, pllCtrl1, pllCtrl2; + + uint32_t dacMode; + int dacAddr; + + uint32_t vidDesktopOverlayStride; + uint32_t vidDesktopStartAddr; + uint32_t vidProcCfg; + uint32_t vidScreenSize; + uint32_t vidSerialParallelPort; + + int overlay_pix_fmt; + + uint32_t hwCurPatAddr, hwCurLoc, hwCurC0, hwCurC1; + + uint32_t intrCtrl; + + uint32_t overlay_buffer[2][4096]; + + mem_mapping_t linear_mapping; + + mem_mapping_t reg_mapping_low; /*0000000-07fffff*/ + mem_mapping_t reg_mapping_high; /*0c00000-1ffffff - Windows 2000 puts the BIOS ROM in between these two areas*/ + + voodoo_t *voodoo; + + uint32_t desktop_addr; + int desktop_y; + uint32_t desktop_stride_tiled; + + int type; +} banshee_t; + +enum +{ + Init_status = 0x00, + Init_pciInit0 = 0x04, + Init_lfbMemoryConfig = 0x0c, + Init_miscInit0 = 0x10, + Init_miscInit1 = 0x14, + Init_dramInit0 = 0x18, + Init_dramInit1 = 0x1c, + Init_agpInit0 = 0x20, + Init_vgaInit0 = 0x28, + Init_vgaInit1 = 0x2c, + Init_2dCommand = 0x30, + Init_2dSrcBaseAddr = 0x34, + Init_strapInfo = 0x38, + + PLL_pllCtrl0 = 0x40, + PLL_pllCtrl1 = 0x44, + PLL_pllCtrl2 = 0x48, + + DAC_dacMode = 0x4c, + DAC_dacAddr = 0x50, + DAC_dacData = 0x54, + + Video_vidProcCfg = 0x5c, + Video_maxRgbDelta = 0x58, + Video_hwCurPatAddr = 0x60, + Video_hwCurLoc = 0x64, + Video_hwCurC0 = 0x68, + Video_hwCurC1 = 0x6c, + Video_vidSerialParallelPort = 0x78, + Video_vidScreenSize = 0x98, + Video_vidOverlayStartCoords = 0x9c, + Video_vidOverlayEndScreenCoords = 0xa0, + Video_vidOverlayDudx = 0xa4, + Video_vidOverlayDudxOffsetSrcWidth = 0xa8, + Video_vidOverlayDvdy = 0xac, + Video_vidOverlayDvdyOffset = 0xe0, + Video_vidDesktopStartAddr = 0xe4, + Video_vidDesktopOverlayStride = 0xe8 +}; + +enum +{ + cmdBaseAddr0 = 0x20, + cmdBaseSize0 = 0x24, + cmdBump0 = 0x28, + cmdRdPtrL0 = 0x2c, + cmdRdPtrH0 = 0x30, + cmdAMin0 = 0x34, + cmdAMax0 = 0x3c, + cmdFifoDepth0 = 0x44, + cmdHoleCnt0 = 0x48 +}; + +#define VGAINIT0_EXTENDED_SHIFT_OUT (1 << 12) + +#define VIDPROCCFG_CURSOR_MODE (1 << 1) +#define VIDPROCCFG_HALF_MODE (1 << 4) +#define VIDPROCCFG_OVERLAY_ENABLE (1 << 8) +#define VIDPROCCFG_H_SCALE_ENABLE (1 << 14) +#define VIDPROCCFG_V_SCALE_ENABLE (1 << 15) +#define VIDPROCCFG_FILTER_MODE_MASK (3 << 16) +#define VIDPROCCFG_FILTER_MODE_POINT (0 << 16) +#define VIDPROCCFG_FILTER_MODE_DITHER_2X2 (1 << 16) +#define VIDPROCCFG_FILTER_MODE_DITHER_4X4 (2 << 16) +#define VIDPROCCFG_FILTER_MODE_BILINEAR (3 << 16) +#define VIDPROCCFG_DESKTOP_PIX_FORMAT ((banshee->vidProcCfg >> 18) & 7) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT ((banshee->vidProcCfg >> 21) & 7) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT (21) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK (7 << VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT) +#define VIDPROCCFG_DESKTOP_TILE (1 << 24) +#define VIDPROCCFG_OVERLAY_TILE (1 << 25) +#define VIDPROCCFG_2X_MODE (1 << 26) +#define VIDPROCCFG_HWCURSOR_ENA (1 << 27) + +#define OVERLAY_FMT_565 (1) +#define OVERLAY_FMT_YUYV422 (5) +#define OVERLAY_FMT_UYVY422 (6) +#define OVERLAY_FMT_565_DITHER (7) + +#define OVERLAY_START_X_MASK (0xfff) +#define OVERLAY_START_Y_SHIFT (12) +#define OVERLAY_START_Y_MASK (0xfff << OVERLAY_START_Y_SHIFT) + +#define OVERLAY_END_X_MASK (0xfff) +#define OVERLAY_END_Y_SHIFT (12) +#define OVERLAY_END_Y_MASK (0xfff << OVERLAY_END_Y_SHIFT) + +#define OVERLAY_SRC_WIDTH_SHIFT (19) +#define OVERLAY_SRC_WIDTH_MASK (0x1fff << OVERLAY_SRC_WIDTH_SHIFT) + +#define VID_STRIDE_OVERLAY_SHIFT (16) +#define VID_STRIDE_OVERLAY_MASK (0x7fff << VID_STRIDE_OVERLAY_SHIFT) + +#define VID_DUDX_MASK (0xffffff) +#define VID_DVDY_MASK (0xffffff) + +#define PIX_FORMAT_8 0 +#define PIX_FORMAT_RGB565 1 +#define PIX_FORMAT_RGB24 2 +#define PIX_FORMAT_RGB32 3 + +#define VIDSERIAL_DDC_DCK_W (1 << 19) +#define VIDSERIAL_DDC_DDA_W (1 << 20) +#define VIDSERIAL_DDC_DCK_R (1 << 21) +#define VIDSERIAL_DDC_DDA_R (1 << 22) +#define VIDSERIAL_I2C_SCK_W (1 << 24) +#define VIDSERIAL_I2C_SDA_W (1 << 25) +#define VIDSERIAL_I2C_SCK_R (1 << 26) +#define VIDSERIAL_I2C_SDA_R (1 << 27) + +#ifdef ENABLE_BANSHEE_LOG +int banshee_do_log = ENABLE_BANSHEE_LOG; + + +static void +banshee_log(const char *fmt, ...) +{ + va_list ap; + + if (banshee_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define banshee_log(fmt, ...) +#endif + +static uint32_t banshee_status(banshee_t *banshee); + +static void banshee_out(uint16_t addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + svga_t *svga = &banshee->svga; + uint8_t old; + +// /*if (addr != 0x3c9) */banshee_log("banshee_out : %04X %02X %04X:%04X\n", addr, val, CS,cpu_state.pc); + + if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) + addr ^= 0x60; + + switch (addr) + { + case 0x3D4: + svga->crtcreg = val & 0x3f; + return; + case 0x3D5: + if ((svga->crtcreg < 7) && (svga->crtc[0x11] & 0x80)) + return; + if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) + val = (svga->crtc[7] & ~0x10) | (val & 0x10); + old = svga->crtc[svga->crtcreg]; + svga->crtc[svga->crtcreg] = val; + if (old != val) + { + if (svga->crtcreg < 0xe || svga->crtcreg > 0x10) + { + svga->fullchange = changeframecount; + svga_recalctimings(svga); + } + } + break; + } + svga_out(addr, val, svga); +} + +static uint8_t banshee_in(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + svga_t *svga = &banshee->svga; + uint8_t temp; + +// if (addr != 0x3da) banshee_log("banshee_in : %04X ", addr); + + if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) + addr ^= 0x60; + + switch (addr) + { + case 0x3c2: + if ((svga->vgapal[0].r + svga->vgapal[0].g + svga->vgapal[0].b) >= 0x40) + temp = 0; + else + temp = 0x10; + break; + case 0x3D4: + temp = svga->crtcreg; + break; + case 0x3D5: + temp = svga->crtc[svga->crtcreg]; + break; + default: + temp = svga_in(addr, svga); + break; + } +// if (addr != 0x3da) banshee_log("%02X %04X:%04X %i\n", temp, CS,cpu_state.pc, ins); + return temp; +} + +static void banshee_updatemapping(banshee_t *banshee) +{ + svga_t *svga = &banshee->svga; + + if (!(banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM)) + { +// banshee_log("Update mapping - PCI disabled\n"); + mem_mapping_disable(&svga->mapping); + mem_mapping_disable(&banshee->linear_mapping); + mem_mapping_disable(&banshee->reg_mapping_low); + mem_mapping_disable(&banshee->reg_mapping_high); + return; + } + + banshee_log("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc); + switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/ + { + case 0x0: /*128k at A0000*/ + mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000); + svga->banked_mask = 0xffff; + break; + case 0x4: /*64k at A0000*/ + mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000); + svga->banked_mask = 0xffff; + break; + case 0x8: /*32k at B0000*/ + mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000); + svga->banked_mask = 0x7fff; + break; + case 0xC: /*32k at B8000*/ + mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000); + svga->banked_mask = 0x7fff; + break; + } + + banshee_log("Linear framebuffer %08X ", banshee->memBaseAddr1); + mem_mapping_set_addr(&banshee->linear_mapping, banshee->memBaseAddr1, 32 << 20); + banshee_log("registers %08X\n", banshee->memBaseAddr0); + mem_mapping_set_addr(&banshee->reg_mapping_low, banshee->memBaseAddr0, 8 << 20); + mem_mapping_set_addr(&banshee->reg_mapping_high, banshee->memBaseAddr0 + 0xc00000, 20 << 20); +} + +static void banshee_render_16bpp_tiled(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + int x; + uint32_t *p = &((uint32_t *)buffer32->line[svga->displine + svga->y_add])[svga->x_add]; + uint32_t addr; + int drawn = 0; + + if ((svga->displine + svga->y_add) < 0) + return; + + if (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE) + addr = banshee->desktop_addr + ((banshee->desktop_y >> 1) & 31) * 128 + ((banshee->desktop_y >> 6) * banshee->desktop_stride_tiled); + else + addr = banshee->desktop_addr + (banshee->desktop_y & 31) * 128 + ((banshee->desktop_y >> 5) * banshee->desktop_stride_tiled); + + for (x = 0; x <= svga->hdisp; x += 64) + { + if (svga->hwcursor_on || svga->overlay_on) + svga->changedvram[addr >> 12] = 2; + if (svga->changedvram[addr >> 12] || svga->fullchange) + { + uint16_t *vram_p = (uint16_t *)&svga->vram[addr & svga->vram_display_mask]; + int xx; + + for (xx = 0; xx < 64; xx++) + *p++ = video_16to32[*vram_p++]; + + drawn = 1; + } + else + p += 64; + addr += 128*32; + } + + if (drawn) + { + if (svga->firstline_draw == 2000) + svga->firstline_draw = svga->displine; + svga->lastline_draw = svga->displine; + } + + banshee->desktop_y++; +} + +static void banshee_recalctimings(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + +/*7 R/W Horizontal Retrace End bit 5. - + 6 R/W Horizontal Retrace Start bit 8 0x4 + 5 R/W Horizontal Blank End bit 6. - + 4 R/W Horizontal Blank Start bit 8. 0x3 + 3 R/W Reserved. - + 2 R/W Horizontal Display Enable End bit 8. 0x1 + 1 R/W Reserved. - + 0 R/W Horizontal Total bit 8. 0x0*/ + if (svga->crtc[0x1a] & 0x01) svga->htotal += 0x100; + if (svga->crtc[0x1a] & 0x04) svga->hdisp += 0x100; +/*6 R/W Vertical Retrace Start bit 10 0x10 + 5 R/W Reserved. - + 4 R/W Vertical Blank Start bit 10. 0x15 + 3 R/W Reserved. - + 2 R/W Vertical Display Enable End bit 10 0x12 + 1 R/W Reserved. - + 0 R/W Vertical Total bit 10. 0x6*/ + if (svga->crtc[0x1b] & 0x01) svga->vtotal += 0x400; + if (svga->crtc[0x1b] & 0x04) svga->dispend += 0x400; + if (svga->crtc[0x1b] & 0x10) svga->vblankstart += 0x400; + if (svga->crtc[0x1b] & 0x40) svga->vsyncstart += 0x400; +// banshee_log("svga->hdisp=%i\n", svga->hdisp); + + if (banshee->vgaInit0 & VGAINIT0_EXTENDED_SHIFT_OUT) + { + switch (VIDPROCCFG_DESKTOP_PIX_FORMAT) + { + case PIX_FORMAT_8: + svga->render = svga_render_8bpp_highres; + svga->bpp = 8; + break; + case PIX_FORMAT_RGB565: + svga->render = (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) ? banshee_render_16bpp_tiled : svga_render_16bpp_highres; + svga->bpp = 16; + break; + case PIX_FORMAT_RGB24: + svga->render = svga_render_24bpp_highres; + svga->bpp = 24; + break; + case PIX_FORMAT_RGB32: + svga->render = svga_render_32bpp_highres; + svga->bpp = 32; + break; + default: + fatal("Unknown pixel format %08x\n", banshee->vgaInit0); + } + if (!(banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) && (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE)) + svga->rowcount = 1; + else + svga->rowcount = 0; + if (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) + svga->rowoffset = ((banshee->vidDesktopOverlayStride & 0x3fff) * 128) >> 3; + else + svga->rowoffset = (banshee->vidDesktopOverlayStride & 0x3fff) >> 3; + svga->ma_latch = banshee->vidDesktopStartAddr >> 2; + banshee->desktop_stride_tiled = (banshee->vidDesktopOverlayStride & 0x3fff) * 128 * 32; +// banshee_log("Extended shift out %i rowoffset=%i %02x\n", VIDPROCCFG_DESKTOP_PIX_FORMAT, svga->rowoffset, svga->crtc[1]); + + svga->char_width = 8; + svga->split = 99999; + + if (banshee->vidProcCfg & VIDPROCCFG_2X_MODE) + { + svga->hdisp *= 2; + svga->htotal *= 2; + } + + svga->overlay.ena = banshee->vidProcCfg & VIDPROCCFG_OVERLAY_ENABLE; + + svga->overlay.x = voodoo->overlay.start_x; + svga->overlay.y = voodoo->overlay.start_y; + svga->overlay.xsize = voodoo->overlay.size_x; + svga->overlay.ysize = voodoo->overlay.size_y; + svga->overlay.pitch = (banshee->vidDesktopOverlayStride & VID_STRIDE_OVERLAY_MASK) >> VID_STRIDE_OVERLAY_SHIFT; + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) + svga->overlay.pitch *= 128*32; + if (svga->overlay.xsize <= 0 || svga->overlay.ysize <= 0) + svga->overlay.ena = 0; + if (svga->overlay.ena) + { +/* banshee_log("Overlay enabled : start=%i,%i end=%i,%i size=%i,%i pitch=%x\n", + voodoo->overlay.start_x, voodoo->overlay.start_y, + voodoo->overlay.end_x, voodoo->overlay.end_y, + voodoo->overlay.size_x, voodoo->overlay.size_y, + svga->overlay.pitch);*/ + if (!voodoo->overlay.start_x && !voodoo->overlay.start_y && + svga->hdisp == voodoo->overlay.size_x && svga->dispend == voodoo->overlay.size_y) + { + /*Overlay is full screen, so don't bother rendering the desktop + behind it*/ + svga->render = svga_render_null; + svga->bpp = 0; + } + } + } + else + { +// banshee_log("Normal shift out\n"); + svga->bpp = 8; + } + + if (((svga->miscout >> 2) & 3) == 3) + { + int k = banshee->pllCtrl0 & 3; + int m = (banshee->pllCtrl0 >> 2) & 0x3f; + int n = (banshee->pllCtrl0 >> 8) & 0xff; + double freq = (((double)n + 2) / (((double)m + 2) * (double)(1 << k))) * 14318184.0; + + svga->clock = (cpuclock * (float)(1ull << 32)) / freq; +// svga->clock = cpuclock / freq; + +// banshee_log("svga->clock = %g %g m=%i k=%i n=%i\n", freq, freq / 1000000.0, m, k, n); + } +} + +static void banshee_ext_out(uint16_t addr, uint8_t val, void *p) +{ +// banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + +// banshee_log("banshee_ext_out: addr=%04x val=%02x\n", addr, val); + + switch (addr & 0xff) + { + case 0xb0: case 0xb1: case 0xb2: case 0xb3: + case 0xb4: case 0xb5: case 0xb6: case 0xb7: + case 0xb8: case 0xb9: case 0xba: case 0xbb: + case 0xbc: case 0xbd: case 0xbe: case 0xbf: + case 0xc0: case 0xc1: case 0xc2: case 0xc3: + case 0xc4: case 0xc5: case 0xc6: case 0xc7: + case 0xc8: case 0xc9: case 0xca: case 0xcb: + case 0xcc: case 0xcd: case 0xce: case 0xcf: + case 0xd0: case 0xd1: case 0xd2: case 0xd3: + case 0xd4: case 0xd5: case 0xd6: case 0xd7: + case 0xd8: case 0xd9: case 0xda: case 0xdb: + case 0xdc: case 0xdd: case 0xde: case 0xdf: + banshee_out((addr & 0xff)+0x300, val, p); + break; + + default: + banshee_log("bad banshee_ext_out: addr=%04x val=%02x\n", addr, val); + } +} +static void banshee_ext_outl(uint16_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + +// banshee_log("banshee_ext_outl: addr=%04x val=%08x %04x(%08x):%08x\n", addr, val, CS,cs,cpu_state.pc); + + switch (addr & 0xff) + { + case Init_pciInit0: + banshee->pciInit0 = val; + voodoo->read_time = pci_nonburst_time + pci_burst_time * ((val & 0x100) ? 2 : 1); + voodoo->burst_time = pci_burst_time * ((val & 0x200) ? 1 : 0); + voodoo->write_time = pci_nonburst_time + voodoo->burst_time; + break; + + case Init_lfbMemoryConfig: + banshee->lfbMemoryConfig = val; +// banshee_log("lfbMemoryConfig=%08x\n", val); + voodoo->tile_base = (val & 0x1fff) << 12; + voodoo->tile_stride = 1024 << ((val >> 13) & 7); + voodoo->tile_stride_shift = 10 + ((val >> 13) & 7); + voodoo->tile_x = ((val >> 16) & 0x7f) * 128; + voodoo->tile_x_real = ((val >> 16) & 0x7f) * 128*32; + break; + + case Init_miscInit0: + banshee->miscInit0 = val; + break; + case Init_miscInit1: + banshee->miscInit1 = val; + break; + case Init_dramInit0: + banshee->dramInit0 = val; + break; + case Init_dramInit1: + banshee->dramInit1 = val; + break; + case Init_agpInit0: + banshee->agpInit0 = val; + break; + + case Init_2dCommand: + banshee->command_2d = val; + break; + case Init_2dSrcBaseAddr: + banshee->srcBaseAddr_2d = val; + break; + case Init_vgaInit0: + banshee->vgaInit0 = val; + break; + case Init_vgaInit1: + banshee->vgaInit1 = val; + svga->write_bank = (val & 0x3ff) << 15; + svga->read_bank = ((val >> 10) & 0x3ff) << 15; + break; + + case PLL_pllCtrl0: + banshee->pllCtrl0 = val; + break; + case PLL_pllCtrl1: + banshee->pllCtrl1 = val; + break; + case PLL_pllCtrl2: + banshee->pllCtrl2 = val; + break; + + case DAC_dacMode: + banshee->dacMode = val; + break; + case DAC_dacAddr: + banshee->dacAddr = val & 0x1ff; + break; + case DAC_dacData: + svga->pallook[banshee->dacAddr] = val & 0xffffff; + svga->fullchange = changeframecount; + break; + + case Video_vidProcCfg: + banshee->vidProcCfg = val; +// banshee_log("vidProcCfg=%08x\n", val); + banshee->overlay_pix_fmt = (val & VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK) >> VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT; + svga->hwcursor.ena = val & VIDPROCCFG_HWCURSOR_ENA; + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; + + case Video_maxRgbDelta: + banshee->voodoo->scrfilterThreshold = val; + if (val > 0x00) + banshee->voodoo->scrfilterEnabled = 1; + else + banshee->voodoo->scrfilterEnabled = 0; + voodoo_threshold_check(banshee->voodoo); + banshee_log("Banshee Filter: %06x\n", val); + break; + + case Video_hwCurPatAddr: + banshee->hwCurPatAddr = val; + svga->hwcursor.addr = val & 0xfffff0; + break; + case Video_hwCurLoc: + banshee->hwCurLoc = val; + svga->hwcursor.x = (val & 0x7ff) - 32; + svga->hwcursor.y = ((val >> 16) & 0x7ff) - 64; + if (svga->hwcursor.y < 0) + { + svga->hwcursor.yoff = -svga->hwcursor.y; + svga->hwcursor.y = 0; + } + else + svga->hwcursor.yoff = 0; + svga->hwcursor.xsize = 64; + svga->hwcursor.ysize = 64; +// banshee_log("hwCurLoc %08x %i\n", val, svga->hwcursor.y); + break; + case Video_hwCurC0: + banshee->hwCurC0 = val; + break; + case Video_hwCurC1: + banshee->hwCurC1 = val; + break; + + case Video_vidSerialParallelPort: + banshee->vidSerialParallelPort = val; +// banshee_log("vidSerialParallelPort: write %08x %08x %04x(%08x):%08x\n", val, val & (VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W), CS,cs,cpu_state.pc); + ddc_i2c_change((val & VIDSERIAL_DDC_DCK_W) ? 1 : 0, (val & VIDSERIAL_DDC_DDA_W) ? 1 : 0); + break; + + case Video_vidScreenSize: + banshee->vidScreenSize = val; + voodoo->h_disp = (val & 0xfff) + 1; + voodoo->v_disp = (val >> 12) & 0xfff; + break; + case Video_vidOverlayStartCoords: + voodoo->overlay.vidOverlayStartCoords = val; + voodoo->overlay.start_x = val & OVERLAY_START_X_MASK; + voodoo->overlay.start_y = (val & OVERLAY_START_Y_MASK) >> OVERLAY_START_Y_SHIFT; + voodoo->overlay.size_x = voodoo->overlay.end_x - voodoo->overlay.start_x; + voodoo->overlay.size_y = voodoo->overlay.end_y - voodoo->overlay.start_y; + svga_recalctimings(svga); + break; + case Video_vidOverlayEndScreenCoords: + voodoo->overlay.vidOverlayEndScreenCoords = val; + voodoo->overlay.end_x = val & OVERLAY_END_X_MASK; + voodoo->overlay.end_y = (val & OVERLAY_END_Y_MASK) >> OVERLAY_END_Y_SHIFT; + voodoo->overlay.size_x = (voodoo->overlay.end_x - voodoo->overlay.start_x) + 1; + voodoo->overlay.size_y = (voodoo->overlay.end_y - voodoo->overlay.start_y) + 1; + svga_recalctimings(svga); + break; + case Video_vidOverlayDudx: + voodoo->overlay.vidOverlayDudx = val & VID_DUDX_MASK; +// banshee_log("vidOverlayDudx=%08x\n", val); + break; + case Video_vidOverlayDudxOffsetSrcWidth: + voodoo->overlay.vidOverlayDudxOffsetSrcWidth = val; + voodoo->overlay.overlay_bytes = (val & OVERLAY_SRC_WIDTH_MASK) >> OVERLAY_SRC_WIDTH_SHIFT; +// banshee_log("vidOverlayDudxOffsetSrcWidth=%08x\n", val); + break; + case Video_vidOverlayDvdy: + voodoo->overlay.vidOverlayDvdy = val & VID_DVDY_MASK; +// banshee_log("vidOverlayDvdy=%08x\n", val); + break; + case Video_vidOverlayDvdyOffset: + voodoo->overlay.vidOverlayDvdyOffset = val; + break; + + + case Video_vidDesktopStartAddr: + banshee->vidDesktopStartAddr = val & 0xffffff; +// banshee_log("vidDesktopStartAddr=%08x\n", val); + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; + case Video_vidDesktopOverlayStride: + banshee->vidDesktopOverlayStride = val; +// banshee_log("vidDesktopOverlayStride=%08x\n", val); + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; +// default: +// fatal("bad banshee_ext_outl: addr=%04x val=%08x\n", addr, val); + } +} + +static uint8_t banshee_ext_in(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + uint8_t ret = 0xff; + + switch (addr & 0xff) + { + case Init_status: case Init_status+1: case Init_status+2: case Init_status+3: + ret = (banshee_status(banshee) >> ((addr & 3) * 8)) & 0xff; +// banshee_log("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc); + break; + + case 0xb0: case 0xb1: case 0xb2: case 0xb3: + case 0xb4: case 0xb5: case 0xb6: case 0xb7: + case 0xb8: case 0xb9: case 0xba: case 0xbb: + case 0xbc: case 0xbd: case 0xbe: case 0xbf: + case 0xc0: case 0xc1: case 0xc2: case 0xc3: + case 0xc4: case 0xc5: case 0xc6: case 0xc7: + case 0xc8: case 0xc9: case 0xca: case 0xcb: + case 0xcc: case 0xcd: case 0xce: case 0xcf: + case 0xd0: case 0xd1: case 0xd2: case 0xd3: + case 0xd4: case 0xd5: case 0xd6: case 0xd7: + case 0xd8: case 0xd9: case 0xda: case 0xdb: + case 0xdc: case 0xdd: case 0xde: case 0xdf: + ret = banshee_in((addr & 0xff)+0x300, p); + break; + + default: + banshee_log("bad banshee_ext_in: addr=%04x\n", addr); + break; + } + +// banshee_log("banshee_ext_in: addr=%04x val=%02x\n", addr, ret); + + return ret; +} + +static uint32_t banshee_status(banshee_t *banshee) +{ + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + int fifo_entries = FIFO_ENTRIES; + int fifo_size = 0xffff - fifo_entries; + int swap_count = voodoo->swap_count; + int written = voodoo->cmd_written + voodoo->cmd_written_fifo; + int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) || + voodoo->render_voodoo_busy[0] || voodoo->render_voodoo_busy[1] || + voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3] || + voodoo->voodoo_busy; + uint32_t ret; + + ret = 0; + if (fifo_size < 0x20) + ret |= fifo_size; + else + ret |= 0x1f; + if (fifo_size) + ret |= 0x20; + if (swap_count < 7) + ret |= (swap_count << 28); + else + ret |= (7 << 28); + if (!(svga->cgastat & 8)) + ret |= 0x40; + + if (busy) + ret |= 0x780; /*Busy*/ + + if (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) + ret |= (1 << 11); + + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_thread(voodoo); + +// banshee_log("banshee_status: busy %i %i (%i %i) %i %i %i %04x(%08x):%08x %08x\n", busy, written, voodoo->cmd_written, voodoo->cmd_written_fifo, voodoo->cmd_read, voodoo->cmdfifo_depth_rd, voodoo->cmdfifo_depth_wr, CS,cs,cpu_state.pc, ret); + + return ret; +} + +static uint32_t banshee_ext_inl(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + uint32_t ret = 0xffffffff; + + sub_cycles(voodoo->read_time); + + switch (addr & 0xff) + { + case Init_status: + ret = banshee_status(banshee); +// banshee_log("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc); + break; + case Init_pciInit0: + ret = banshee->pciInit0; + break; + case Init_lfbMemoryConfig: + ret = banshee->lfbMemoryConfig; + break; + + case Init_miscInit0: + ret = banshee->miscInit0; + break; + case Init_miscInit1: + ret = banshee->miscInit1; + break; + case Init_dramInit0: + ret = banshee->dramInit0; + break; + case Init_dramInit1: + ret = banshee->dramInit1; + break; + case Init_agpInit0: + ret = banshee->agpInit0; + break; + + case Init_vgaInit0: + ret = banshee->vgaInit0; + break; + case Init_vgaInit1: + ret = banshee->vgaInit1; + break; + + case Init_2dCommand: + ret = banshee->command_2d; + break; + case Init_2dSrcBaseAddr: + ret = banshee->srcBaseAddr_2d; + break; + case Init_strapInfo: + ret = 0x00000040; /*8 MB SGRAM, PCI, IRQ enabled, 32kB BIOS*/ + break; + + case PLL_pllCtrl0: + ret = banshee->pllCtrl0; + break; + case PLL_pllCtrl1: + ret = banshee->pllCtrl1; + break; + case PLL_pllCtrl2: + ret = banshee->pllCtrl2; + break; + + case DAC_dacMode: + ret = banshee->dacMode; + break; + case DAC_dacAddr: + ret = banshee->dacAddr; + break; + case DAC_dacData: + ret = svga->pallook[banshee->dacAddr]; + break; + + case Video_vidProcCfg: + ret = banshee->vidProcCfg; + break; + + case Video_hwCurPatAddr: + ret = banshee->hwCurPatAddr; + break; + case Video_hwCurLoc: + ret = banshee->hwCurLoc; + break; + case Video_hwCurC0: + ret = banshee->hwCurC0; + break; + case Video_hwCurC1: + ret = banshee->hwCurC1; + break; + + case Video_vidSerialParallelPort: + ret = banshee->vidSerialParallelPort & ~(VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R); + if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DCK_W) && ddc_read_clock()) + ret |= VIDSERIAL_DDC_DCK_R; + if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DDA_W) && ddc_read_data()) + ret |= VIDSERIAL_DDC_DDA_R; + ret = ret & ~(VIDSERIAL_I2C_SCK_R | VIDSERIAL_I2C_SDA_R); + if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SCK_W) + ret |= VIDSERIAL_I2C_SCK_R; + if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SDA_W) + ret |= VIDSERIAL_I2C_SDA_R; +// banshee_log("vidSerialParallelPort: read %08x %08x %04x(%08x):%08x\n", ret, ret & (VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R), CS,cs,cpu_state.pc); + break; + + case Video_vidScreenSize: + ret = banshee->vidScreenSize; + break; + case Video_vidOverlayStartCoords: + ret = voodoo->overlay.vidOverlayStartCoords; + break; + case Video_vidOverlayEndScreenCoords: + ret = voodoo->overlay.vidOverlayEndScreenCoords; + break; + case Video_vidOverlayDudx: + ret = voodoo->overlay.vidOverlayDudx; + break; + case Video_vidOverlayDudxOffsetSrcWidth: + ret = voodoo->overlay.vidOverlayDudxOffsetSrcWidth; + break; + case Video_vidOverlayDvdy: + ret = voodoo->overlay.vidOverlayDvdy; + break; + case Video_vidOverlayDvdyOffset: + ret = voodoo->overlay.vidOverlayDvdyOffset; + break; + + case Video_vidDesktopStartAddr: + ret = banshee->vidDesktopStartAddr; + break; + case Video_vidDesktopOverlayStride: + ret = banshee->vidDesktopOverlayStride; + break; + + default: +// fatal("bad banshee_ext_inl: addr=%04x\n", addr); + break; + } + +// /*if (addr) */banshee_log("banshee_ext_inl: addr=%04x val=%08x\n", addr, ret); + + return ret; +} + + +static uint32_t banshee_reg_readl(uint32_t addr, void *p); + +static uint8_t banshee_reg_read(uint32_t addr, void *p) +{ +// banshee_log("banshee_reg_read: addr=%08x\n", addr); + return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 3)); +} + +static uint16_t banshee_reg_readw(uint32_t addr, void *p) +{ +// banshee_log("banshee_reg_readw: addr=%08x\n", addr); + return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 2)); +} + +static uint32_t banshee_cmd_read(banshee_t *banshee, uint32_t addr) +{ + voodoo_t *voodoo = banshee->voodoo; + uint32_t ret = 0xffffffff; + + switch (addr & 0x1fc) + { + case cmdBaseAddr0: + ret = voodoo->cmdfifo_base >> 12; +// banshee_log("Read cmdfifo_base %08x\n", ret); + break; + + case cmdRdPtrL0: + ret = voodoo->cmdfifo_rp; +// banshee_log("Read cmdfifo_rp %08x\n", ret); + break; + + case cmdFifoDepth0: + ret = voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd; +// banshee_log("Read cmdfifo_depth %08x\n", ret); + break; + + case 0x108: + break; + + default: + fatal("Unknown banshee_cmd_read %08x\n", addr); + } + + return ret; +} + +static uint32_t banshee_reg_readl(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + uint32_t ret = 0xffffffff; + + sub_cycles(voodoo->read_time); + + switch (addr & 0x1f00000) + { + case 0x0000000: /*IO remap*/ + if (!(addr & 0x80000)) + ret = banshee_ext_inl(addr & 0xff, banshee); + else + ret = banshee_cmd_read(banshee, addr); + break; + + case 0x0100000: /*2D registers*/ + voodoo_flush(voodoo); + switch (addr & 0x1fc) + { + case 0x08: + ret = voodoo->banshee_blt.clip0Min; + break; + case 0x0c: + ret = voodoo->banshee_blt.clip0Max; + break; + case 0x10: + ret = voodoo->banshee_blt.dstBaseAddr; + break; + case 0x14: + ret = voodoo->banshee_blt.dstFormat; + break; + case 0x34: + ret = voodoo->banshee_blt.srcBaseAddr; + break; + case 0x38: + ret = voodoo->banshee_blt.commandExtra; + break; + case 0x5c: + ret = voodoo->banshee_blt.srcXY; + break; + case 0x60: + ret = voodoo->banshee_blt.colorBack; + break; + case 0x64: + ret = voodoo->banshee_blt.colorFore; + break; + case 0x68: + ret = voodoo->banshee_blt.dstSize; + break; + case 0x6c: + ret = voodoo->banshee_blt.dstXY; + break; + case 0x70: + ret = voodoo->banshee_blt.command; + break; + default: + banshee_log("banshee_reg_readl: addr=%08x\n", addr); + } + break; + + case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/ + switch (addr & 0x3fc) + { + case SST_status: + ret = banshee_status(banshee); + break; + + case SST_intrCtrl: + ret = banshee->intrCtrl & 0x0030003f; + break; + + case SST_fbzColorPath: + voodoo_flush(voodoo); + ret = voodoo->params.fbzColorPath; + break; + case SST_fogMode: + voodoo_flush(voodoo); + ret = voodoo->params.fogMode; + break; + case SST_alphaMode: + voodoo_flush(voodoo); + ret = voodoo->params.alphaMode; + break; + case SST_fbzMode: + voodoo_flush(voodoo); + ret = voodoo->params.fbzMode; + break; + case SST_lfbMode: + voodoo_flush(voodoo); + ret = voodoo->lfbMode; + break; + case SST_clipLeftRight: + ret = voodoo->params.clipRight | (voodoo->params.clipLeft << 16); + break; + case SST_clipLowYHighY: + ret = voodoo->params.clipHighY | (voodoo->params.clipLowY << 16); + break; + + case SST_clipLeftRight1: + ret = voodoo->params.clipRight1 | (voodoo->params.clipLeft1 << 16); + break; + case SST_clipTopBottom1: + ret = voodoo->params.clipHighY1 | (voodoo->params.clipLowY1 << 16); + break; + + case SST_stipple: + voodoo_flush(voodoo); + ret = voodoo->params.stipple; + break; + case SST_color0: + voodoo_flush(voodoo); + ret = voodoo->params.color0; + break; + case SST_color1: + voodoo_flush(voodoo); + ret = voodoo->params.color1; + break; + + case SST_fbiPixelsIn: + ret = voodoo->fbiPixelsIn & 0xffffff; + break; + case SST_fbiChromaFail: + ret = voodoo->fbiChromaFail & 0xffffff; + break; + case SST_fbiZFuncFail: + ret = voodoo->fbiZFuncFail & 0xffffff; + break; + case SST_fbiAFuncFail: + ret = voodoo->fbiAFuncFail & 0xffffff; + break; + case SST_fbiPixelsOut: + ret = voodoo->fbiPixelsOut & 0xffffff; + break; + + default: + fatal("banshee_reg_readl: 3D addr=%08x\n", addr); + break; + } + break; + } + +// /*if (addr != 0xe0000000) */banshee_log("banshee_reg_readl: addr=%08x ret=%08x %04x(%08x):%08x\n", addr, ret, CS,cs,cpu_state.pc); + + return ret; +} + +static void banshee_reg_write(uint32_t addr, uint8_t val, void *p) +{ +// banshee_log("banshee_reg_writeb: addr=%08x val=%02x\n", addr, val); +} + +static void banshee_reg_writew(uint32_t addr, uint16_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + sub_cycles(voodoo->write_time); + +// banshee_log("banshee_reg_writew: addr=%08x val=%04x\n", addr, val); + switch (addr & 0x1f00000) + { + case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/ + case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000: + case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000: + case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000: + voodoo_queue_command(voodoo, (addr & 0xffffff) | FIFO_WRITEW_FB, val); + break; + } +} + +static void banshee_cmd_write(banshee_t *banshee, uint32_t addr, uint32_t val) +{ + voodoo_t *voodoo = banshee->voodoo; +// banshee_log("banshee_cmd_write: addr=%03x val=%08x\n", addr & 0x1fc, val); + switch (addr & 0x1fc) + { + case cmdBaseAddr0: + voodoo->cmdfifo_base = (val & 0xfff) << 12; + voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12); +// banshee_log("cmdfifo_base=%08x cmdfifo_end=%08x %08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end, val); + break; + + case cmdBaseSize0: + voodoo->cmdfifo_size = val; + voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12); + voodoo->cmdfifo_enabled = val & 0x100; +// banshee_log("cmdfifo_base=%08x cmdfifo_end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); + break; + +// voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12; +// banshee_log("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); +// break; + + case cmdRdPtrL0: + voodoo->cmdfifo_rp = val; + break; + case cmdAMin0: + voodoo->cmdfifo_amin = val; + break; + case cmdAMax0: + voodoo->cmdfifo_amax = val; + break; + case cmdFifoDepth0: + voodoo->cmdfifo_depth_rd = 0; + voodoo->cmdfifo_depth_wr = val & 0xffff; + break; + + default: + banshee_log("Unknown banshee_cmd_write: addr=%08x val=%08x\n", addr, val); + break; + } + +/* cmdBaseSize0 = 0x24, + cmdBump0 = 0x28, + cmdRdPtrL0 = 0x2c, + cmdRdPtrH0 = 0x30, + cmdAMin0 = 0x34, + cmdAMax0 = 0x3c, + cmdFifoDepth0 = 0x44, + cmdHoleCnt0 = 0x48 + }*/ +} + +static void banshee_reg_writel(uint32_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + if (addr == voodoo->last_write_addr+4) + sub_cycles(voodoo->burst_time); + else + sub_cycles(voodoo->write_time); + voodoo->last_write_addr = addr; + +// banshee_log("banshee_reg_writel: addr=%08x val=%08x\n", addr, val); + + switch (addr & 0x1f00000) + { + case 0x0000000: /*IO remap*/ + if (!(addr & 0x80000)) + banshee_ext_outl(addr & 0xff, val, banshee); + else + banshee_cmd_write(banshee, addr, val); +// banshee_log("CMD!!! write %08x %08x\n", addr, val); + break; + + case 0x0100000: /*2D registers*/ + voodoo_queue_command(voodoo, (addr & 0x1fc) | FIFO_WRITEL_2DREG, val); + break; + + case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/ + switch (addr & 0x3fc) + { + case SST_intrCtrl: + banshee->intrCtrl = val & 0x0030003f; +// banshee_log("intrCtrl=%08x\n", val); + break; + + case SST_userIntrCMD: + fatal("userIntrCMD write %08x\n", val); + break; + + case SST_swapbufferCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); +// banshee_log("SST_swapbufferCMD write: %i %i\n", voodoo->cmd_written, voodoo->cmd_written_fifo); + break; + case SST_triangleCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_ftriangleCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_fastfillCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_nopCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + + case SST_swapPending: + voodoo->swap_count++; +// voodoo->cmd_written++; + break; + + default: + voodoo_queue_command(voodoo, (addr & 0x3ffffc) | FIFO_WRITEL_REG, val); + break; + } + break; + + case 0x0600000: case 0x0700000: /*Texture download*/ + voodoo->tex_count++; + voodoo_queue_command(voodoo, (addr & 0x1ffffc) | FIFO_WRITEL_TEX, val); + break; + + case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/ + case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000: + case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000: + case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000: + voodoo_queue_command(voodoo, (addr & 0xfffffc) | FIFO_WRITEL_FB, val); + break; + } +} + +static uint8_t banshee_read_linear(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + sub_cycles(voodoo->read_time); + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + sub_cycles(video_timing_read_b); + +// banshee_log("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return svga->vram[addr & svga->vram_mask]; +} + +static uint16_t banshee_read_linear_w(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + sub_cycles(voodoo->read_time); + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + sub_cycles(video_timing_read_w); + +// banshee_log("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return *(uint16_t *)&svga->vram[addr & svga->vram_mask]; +} + +static uint32_t banshee_read_linear_l(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + sub_cycles(voodoo->read_time); + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + sub_cycles(video_timing_read_l); + +// banshee_log("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return *(uint32_t *)&svga->vram[addr & svga->vram_mask]; +} + +static void banshee_write_linear(uint32_t addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + sub_cycles(voodoo->write_time); + +// banshee_log("write_linear: addr=%08x val=%02x\n", addr, val); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return; + + egawrites++; + + sub_cycles(video_timing_write_b); + + svga->changedvram[addr >> 12] = changeframecount; + svga->vram[addr & svga->vram_mask] = val; +} + +static void banshee_write_linear_w(uint32_t addr, uint16_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + sub_cycles(voodoo->write_time); + +// banshee_log("write_linear: addr=%08x val=%02x\n", addr, val); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return; + + egawrites++; + + sub_cycles(video_timing_write_w); + + svga->changedvram[addr >> 12] = changeframecount; + *(uint16_t *)&svga->vram[addr & svga->vram_mask] = val; +} + +static void banshee_write_linear_l(uint32_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + int timing; + + if (addr == voodoo->last_write_addr+4) + timing = voodoo->burst_time; + else + timing = voodoo->write_time; + sub_cycles(timing); + voodoo->last_write_addr = addr; + +// /*if (val) */banshee_log("write_linear_l: addr=%08x val=%08x %08x\n", addr, val, voodoo->tile_base); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// banshee_log(" Tile %08x->%08x->%08x->%08x %i %i tile_x=%i\n", old_addr, addr_off, addr2, addr, x, y, voodoo->tile_x_real); + } + + if (addr >= svga->vram_max) + return; + + egawrites += 4; + + sub_cycles(video_timing_write_l); + + svga->changedvram[addr >> 12] = changeframecount; + *(uint32_t *)&svga->vram[addr & svga->vram_mask] = val; + if (voodoo->cmdfifo_enabled && addr >= voodoo->cmdfifo_base && addr < voodoo->cmdfifo_end) + { +// banshee_log("CMDFIFO write %08x %08x old amin=%08x amax=%08x hlcnt=%i depth_wr=%i rp=%08x\n", addr, val, voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount, voodoo->cmdfifo_depth_wr, voodoo->cmdfifo_rp); + if (addr == voodoo->cmdfifo_base && !voodoo->cmdfifo_holecount) + { +// if (voodoo->cmdfifo_holecount) +// fatal("CMDFIFO reset pointers while outstanding holes\n"); + /*Reset pointers*/ + voodoo->cmdfifo_amin = voodoo->cmdfifo_base; + voodoo->cmdfifo_amax = voodoo->cmdfifo_base; + voodoo->cmdfifo_depth_wr++; + voodoo_wake_fifo_thread(voodoo); + } + else if (voodoo->cmdfifo_holecount) + { +// if ((addr <= voodoo->cmdfifo_amin && voodoo->cmdfifo_amin != -4) || addr >= voodoo->cmdfifo_amax) +// fatal("CMDFIFO holecount write outside of amin/amax - amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount); +// banshee_log("holecount %i\n", voodoo->cmdfifo_holecount); + voodoo->cmdfifo_holecount--; + if (!voodoo->cmdfifo_holecount) + { + /*Filled in holes, resume normal operation*/ + voodoo->cmdfifo_depth_wr += ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2); + voodoo->cmdfifo_amin = voodoo->cmdfifo_amax; + voodoo_wake_fifo_thread(voodoo); +// banshee_log("hole filled! amin=%08x amax=%08x added %i words\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, words_to_add); + } + } + else if (addr == voodoo->cmdfifo_amax+4) + { + /*In-order write*/ + voodoo->cmdfifo_amin = addr; + voodoo->cmdfifo_amax = addr; + voodoo->cmdfifo_depth_wr++; + voodoo_wake_fifo_thread(voodoo); + } + else + { + /*Out-of-order write*/ + if (addr < voodoo->cmdfifo_amin) + { + /*Reset back to start. Note that write is still out of order!*/ + voodoo->cmdfifo_amin = voodoo->cmdfifo_base-4; + + } +// else if (addr < voodoo->cmdfifo_amax) +// fatal("Out-of-order write really out of order\n"); + voodoo->cmdfifo_amax = addr; + voodoo->cmdfifo_holecount = ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2) - 1; +// banshee_log("CMDFIFO out of order: amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount); + } + } +} + +void banshee_hwcursor_draw(svga_t *svga, int displine) +{ + banshee_t *banshee = (banshee_t *)svga->p; + int x, c; + int x_off; + int xx; + uint32_t col0 = banshee->hwCurC0; + uint32_t col1 = banshee->hwCurC1; + uint8_t plane0[8], plane1[8]; + + for (c = 0; c < 8; c++) + plane0[c] = svga->vram[svga->hwcursor_latch.addr + c]; + for (c = 0; c < 8; c++) + plane1[c] = svga->vram[svga->hwcursor_latch.addr + c + 8]; + svga->hwcursor_latch.addr += 16; + + x_off = svga->hwcursor_latch.x; + + if (banshee->vidProcCfg & VIDPROCCFG_CURSOR_MODE) + { + /*X11 mode*/ + for (x = 0; x < 64; x += 8) + { + if (x_off > (32-8)) + { + for (xx = 0; xx < 8; xx++) + { + if (plane0[x >> 3] & (1 << 7)) + ((uint32_t *)buffer32->line[displine])[x_off + xx + svga->x_add] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0; + + plane0[x >> 3] <<= 1; + plane1[x >> 3] <<= 1; + } + } + + x_off += 8; + } + } + else + { + /*Windows mode*/ + for (x = 0; x < 64; x += 8) + { + if (x_off > (32-8)) + { + for (xx = 0; xx < 8; xx++) + { + if (!(plane0[x >> 3] & (1 << 7))) + ((uint32_t *)buffer32->line[displine])[x_off + xx + svga->x_add] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0; + else if (plane1[x >> 3] & (1 << 7)) + ((uint32_t *)buffer32->line[displine])[x_off + xx + svga->x_add] ^= 0xffffff; + + plane0[x >> 3] <<= 1; + plane1[x >> 3] <<= 1; + } + } + + x_off += 8; + } + } +} + +#define CLAMP(x) do \ + { \ + if ((x) & ~0xff) \ + x = ((x) < 0) ? 0 : 0xff; \ + } \ + while (0) + +#define DECODE_RGB565(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \ + { \ + uint16_t data = *(uint16_t *)src; \ + int r = data & 0x1f; \ + int g = (data >> 5) & 0x3f; \ + int b = data >> 11; \ + \ + buf[wp++] = (r << 3) | (g << 10) | (b << 19); \ + src += 2; \ + } \ + } while (0) + +#define DECODE_RGB565_TILED(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \ + { \ + uint16_t data = *(uint16_t *)&src[(c & 127) + (c >> 7)*128*32]; \ + int r = data & 0x1f; \ + int g = (data >> 5) & 0x3f; \ + int b = data >> 11; \ + \ + buf[wp++] = (r << 3) | (g << 10) | (b << 19); \ + } \ + } while (0) + +#define DECODE_YUYV422(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \ + { \ + uint8_t y1, y2; \ + int8_t Cr, Cb; \ + int dR, dG, dB; \ + int r, g, b; \ + \ + y1 = src[0]; \ + Cr = src[1] - 0x80; \ + y2 = src[2]; \ + Cb = src[3] - 0x80; \ + src += 4; \ + \ + dR = (359*Cr) >> 8; \ + dG = (88*Cb + 183*Cr) >> 8; \ + dB = (453*Cb) >> 8; \ + \ + r = y1 + dR; \ + CLAMP(r); \ + g = y1 - dG; \ + CLAMP(g); \ + b = y1 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + \ + r = y2 + dR; \ + CLAMP(r); \ + g = y2 - dG; \ + CLAMP(g); \ + b = y2 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + } \ + } while (0) + +#define DECODE_UYUV422(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \ + { \ + uint8_t y1, y2; \ + int8_t Cr, Cb; \ + int dR, dG, dB; \ + int r, g, b; \ + \ + Cr = src[0] - 0x80; \ + y1 = src[1]; \ + Cb = src[2] - 0x80; \ + y2 = src[3]; \ + src += 4; \ + \ + dR = (359*Cr) >> 8; \ + dG = (88*Cb + 183*Cr) >> 8; \ + dB = (453*Cb) >> 8; \ + \ + r = y1 + dR; \ + CLAMP(r); \ + g = y1 - dG; \ + CLAMP(g); \ + b = y1 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + \ + r = y2 + dR; \ + CLAMP(r); \ + g = y2 - dG; \ + CLAMP(g); \ + b = y2 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + } \ + } while (0) + + +#define OVERLAY_SAMPLE(buf) \ + do \ + { \ + switch (banshee->overlay_pix_fmt) \ + { \ + case 0: \ + break; \ + \ + case OVERLAY_FMT_YUYV422: \ + DECODE_YUYV422(buf); \ + break; \ + \ + case OVERLAY_FMT_UYVY422: \ + DECODE_UYUV422(buf); \ + break; \ + \ + case OVERLAY_FMT_565: \ + case OVERLAY_FMT_565_DITHER: \ + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) \ + DECODE_RGB565_TILED(buf); \ + else \ + DECODE_RGB565(buf); \ + break; \ + \ + default: \ + fatal("Unknown overlay pix fmt %i\n", banshee->overlay_pix_fmt); \ + } \ + } while (0) + +/* generate both filters for the static table here */ +void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg) +{ + int g, h; + float difference, diffg; + float thiscol, thiscolg; + float clr, clg = 0; + float hack = 1.0f; + // pre-clamping + + fcr *= hack; + fcg *= hack; + + + /* box prefilter */ + for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into + { + for (h=0;h<256;h++) // pixel 2 - our main pixel + { + float avg; + float avgdiff; + + difference = (float)(g - h); + avg = g; + avgdiff = avg - h; + + avgdiff = avgdiff * 0.75f; + if (avgdiff < 0) avgdiff *= -1; + if (difference < 0) difference *= -1; + + thiscol = thiscolg = g; + + if (h > g) + { + clr = clg = avgdiff; + + if (clr>fcr) clr=fcr; + if (clg>fcg) clg=fcg; + + thiscol = g; + thiscolg = g; + + if (thiscol>g+fcr) + thiscol=g+fcr; + if (thiscolg>g+fcg) + thiscolg=g+fcg; + + if (thiscol>g+difference) + thiscol=g+difference; + if (thiscolg>g+difference) + thiscolg=g+difference; + + // hmm this might not be working out.. + int ugh = g - h; + if (ugh < fcr) + thiscol = h; + if (ugh < fcg) + thiscolg = h; + } + + if (difference > fcr) + thiscol = g; + if (difference > fcg) + thiscolg = g; + + // clamp + if (thiscol < 0) thiscol = 0; + if (thiscolg < 0) thiscolg = 0; + + if (thiscol > 255) thiscol = 255; + if (thiscolg > 255) thiscolg = 255; + + vb_filter_bx_rb[g][h] = (thiscol); + vb_filter_bx_g [g][h] = (thiscolg); + + } + float lined = g + 4; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][0] = lined; + voodoo->purpleline[g][2] = lined; + + lined = g + 0; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][1] = lined; + } + + /* 4x1 and 2x2 filter */ + //fcr *= 5; + //fcg *= 6; + + for (g=0;g<256;g++) // pixel 1 + { + for (h=0;h<256;h++) // pixel 2 + { + difference = (float)(h - g); + diffg = difference; + + thiscol = thiscolg = g; + + if (difference > fcr) + difference = fcr; + if (difference < -fcr) + difference = -fcr; + + if (diffg > fcg) + diffg = fcg; + if (diffg < -fcg) + diffg = -fcg; + + if ((difference < fcr) || (-difference > -fcr)) + thiscol = g + (difference / 2); + if ((diffg < fcg) || (-diffg > -fcg)) + thiscolg = g + (diffg / 2); + + if (thiscol < 0) + thiscol = 0; + if (thiscol > 255) + thiscol = 255; + + if (thiscolg < 0) + thiscolg = 0; + if (thiscolg > 255) + thiscolg = 255; + + vb_filter_v1_rb[g][h] = thiscol; + vb_filter_v1_g [g][h] = thiscolg; + + } + } + +} + + +static void banshee_overlay_draw(svga_t *svga, int displine) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + uint32_t *p; + int x; + int y = voodoo->overlay.src_y >> 20; + uint32_t src_addr = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ? + ((y & 31) * 128 + (y >> 5) * svga->overlay_latch.pitch) : + y * svga->overlay_latch.pitch); + uint32_t src_addr2 = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ? + (((y + 1) & 31) * 128 + ((y + 1) >> 5) * svga->overlay_latch.pitch) : + (y + 1) * svga->overlay_latch.pitch); + uint8_t *src = &svga->vram[src_addr & svga->vram_mask]; + uint32_t src_x = 0; + unsigned int y_coeff = (voodoo->overlay.src_y & 0xfffff) >> 4; + int skip_filtering; + + if (svga->render == svga_render_null && + !svga->changedvram[src_addr >> 12] && !svga->changedvram[src_addr2 >> 12] && + !svga->fullchange && + ((voodoo->overlay.src_y >> 20) < 2048 && !voodoo->dirty_line[voodoo->overlay.src_y >> 20]) && + !(banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE)) + { + voodoo->overlay.src_y += (1 << 20); + return; + } + + if ((voodoo->overlay.src_y >> 20) < 2048) + voodoo->dirty_line[voodoo->overlay.src_y >> 20] = 0; +// pclog("displine=%i addr=%08x %08x %08x %08x\n", displine, svga->overlay_latch.addr, src_addr, voodoo->overlay.vidOverlayDvdy, *(uint32_t *)src); +// if (src_addr >= 0x800000) +// fatal("overlay out of range!\n"); + p = &((uint32_t *)buffer32->line[displine])[svga->overlay_latch.x + svga->x_add]; + + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR && + !(banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) && !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_4X4) && + !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_2X2)); + else + skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR); + + if (skip_filtering) + { + /*No scaling or filtering required, just write straight to output buffer*/ + OVERLAY_SAMPLE(p); + } + else + { + OVERLAY_SAMPLE(banshee->overlay_buffer[0]); + + switch (banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) + { + case VIDPROCCFG_FILTER_MODE_BILINEAR: + src = &svga->vram[src_addr2 & svga->vram_mask]; + OVERLAY_SAMPLE(banshee->overlay_buffer[1]); + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + unsigned int x_coeff = (src_x & 0xfffff) >> 4; + unsigned int coeffs[4] = { + ((0x10000 - x_coeff) * (0x10000 - y_coeff)) >> 16, + ( x_coeff * (0x10000 - y_coeff)) >> 16, + ((0x10000 - x_coeff) * y_coeff) >> 16, + ( x_coeff * y_coeff) >> 16 + }; + uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20]; + uint32_t samp1 = banshee->overlay_buffer[0][(src_x >> 20) + 1]; + uint32_t samp2 = banshee->overlay_buffer[1][src_x >> 20]; + uint32_t samp3 = banshee->overlay_buffer[1][(src_x >> 20) + 1]; + int r = (((samp0 >> 16) & 0xff) * coeffs[0] + + ((samp1 >> 16) & 0xff) * coeffs[1] + + ((samp2 >> 16) & 0xff) * coeffs[2] + + ((samp3 >> 16) & 0xff) * coeffs[3]) >> 16; + int g = (((samp0 >> 8) & 0xff) * coeffs[0] + + ((samp1 >> 8) & 0xff) * coeffs[1] + + ((samp2 >> 8) & 0xff) * coeffs[2] + + ((samp3 >> 8) & 0xff) * coeffs[3]) >> 16; + int b = ((samp0 & 0xff) * coeffs[0] + + (samp1 & 0xff) * coeffs[1] + + (samp2 & 0xff) * coeffs[2] + + (samp3 & 0xff) * coeffs[3]) >> 16; + p[x] = (r << 16) | (g << 8) | b; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20]; + uint32_t samp1 = banshee->overlay_buffer[1][src_x >> 20]; + int r = (((samp0 >> 16) & 0xff) * (0x10000 - y_coeff) + + ((samp1 >> 16) & 0xff) * y_coeff) >> 16; + int g = (((samp0 >> 8) & 0xff) * (0x10000 - y_coeff) + + ((samp1 >> 8) & 0xff) * y_coeff) >> 16; + int b = ((samp0 & 0xff) * (0x10000 - y_coeff) + + (samp1 & 0xff) * y_coeff) >> 16; + p[x] = (r << 16) | (g << 8) | b; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_DITHER_4X4: + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + { + uint8_t fil[(svga->overlay_latch.xsize) * 3]; + uint8_t fil3[(svga->overlay_latch.xsize) * 3]; + + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* leilei HACK - don't know of real 4x1 hscaled behavior yet, double for now */ + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = ((banshee->overlay_buffer[0][src_x >> 20])); + fil[x*3+1] = ((banshee->overlay_buffer[0][src_x >> 20] >> 8)); + fil[x*3+2] = ((banshee->overlay_buffer[0][src_x >> 20] >> 16)); + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = ((banshee->overlay_buffer[0][x])); + fil[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8)); + fil[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16)); + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + } + } + if (y % 2 == 0) + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = banshee->voodoo->purpleline[fil[x*3+0]][0]; + fil[x*3+1] = banshee->voodoo->purpleline[fil[x*3+1]][1]; + fil[x*3+2] = banshee->voodoo->purpleline[fil[x*3+2]][2]; + } + } + + for (x=1; xoverlay_latch.xsize;x++) + { + fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]]; + fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]]; + fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]]; + } + for (x=1; xoverlay_latch.xsize;x++) + { + fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x-1) *3]]; + fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x-1) *3+1]]; + fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x-1) *3+2]]; + } + for (x=1; xoverlay_latch.xsize;x++) + { + fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]]; + fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]]; + fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]]; + } + for (x=0; xoverlay_latch.xsize;x++) + { + fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x+1) *3]]; + fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x+1) *3+1]]; + fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x+1) *3+2]]; + p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3]; + } + } + else /* filter disabled by emulator option */ + { + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_DITHER_2X2: + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + { + uint8_t fil[(svga->overlay_latch.xsize) * 3]; + uint8_t soak[(svga->overlay_latch.xsize) * 3]; + uint8_t soak2[(svga->overlay_latch.xsize) * 3]; + + uint8_t samp1[(svga->overlay_latch.xsize) * 3]; + uint8_t samp2[(svga->overlay_latch.xsize) * 3]; + uint8_t samp3[(svga->overlay_latch.xsize) * 3]; + uint8_t samp4[(svga->overlay_latch.xsize) * 3]; + + src = &svga->vram[src_addr2 & svga->vram_mask]; + OVERLAY_SAMPLE(banshee->overlay_buffer[1]); + for (x=0; xoverlay_latch.xsize;x++) + { + samp1[x*3] = ((banshee->overlay_buffer[0][x])); + samp1[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8)); + samp1[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16)); + + samp2[x*3+0] = ((banshee->overlay_buffer[0][x+1])); + samp2[x*3+1] = ((banshee->overlay_buffer[0][x+1] >> 8)); + samp2[x*3+2] = ((banshee->overlay_buffer[0][x+1] >> 16)); + + samp3[x*3+0] = ((banshee->overlay_buffer[1][x])); + samp3[x*3+1] = ((banshee->overlay_buffer[1][x] >> 8)); + samp3[x*3+2] = ((banshee->overlay_buffer[1][x] >> 16)); + + samp4[x*3+0] = ((banshee->overlay_buffer[1][x+1])); + samp4[x*3+1] = ((banshee->overlay_buffer[1][x+1] >> 8)); + samp4[x*3+2] = ((banshee->overlay_buffer[1][x+1] >> 16)); + + /* sample two lines */ + + soak[x*3+0] = vb_filter_bx_rb [samp1[x*3+0]] [samp2[x*3+0]]; + soak[x*3+1] = vb_filter_bx_g [samp1[x*3+1]] [samp2[x*3+1]]; + soak[x*3+2] = vb_filter_bx_rb [samp1[x*3+2]] [samp2[x*3+2]]; + + soak2[x*3+0] = vb_filter_bx_rb[samp3[x*3+0]] [samp4[x*3+0]]; + soak2[x*3+1] = vb_filter_bx_g [samp3[x*3+1]] [samp4[x*3+1]]; + soak2[x*3+2] = vb_filter_bx_rb[samp3[x*3+2]] [samp4[x*3+2]]; + + /* then pour it on the rest */ + + fil[x*3+0] = vb_filter_v1_rb[soak[x*3+0]] [soak2[x*3+0]]; + fil[x*3+1] = vb_filter_v1_g [soak[x*3+1]] [soak2[x*3+1]]; + fil[x*3+2] = vb_filter_v1_rb[soak[x*3+2]] [soak2[x*3+2]]; + } + + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* 2x2 on a scaled low res */ + { + for (x=0; xoverlay_latch.xsize;x++) + { + p[x] = (fil[(src_x >> 20)*3+2] << 16) | (fil[(src_x >> 20)*3+1] << 8) | fil[(src_x >> 20)*3]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x=0; xoverlay_latch.xsize;x++) + { + p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3]; + } + } + } + else /* filter disabled by emulator option */ + { + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_POINT: + default: + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + break; + } + } + + if (banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE) + voodoo->overlay.src_y += voodoo->overlay.vidOverlayDvdy; + else + voodoo->overlay.src_y += (1 << 20); +} + +void banshee_set_overlay_addr(void *p, uint32_t addr) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + banshee->svga.overlay.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff; + banshee->svga.overlay_latch.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff; + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); +} + +static void banshee_vsync_callback(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + + voodoo->retrace_count++; + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) + { + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->retrace_count = 0; + banshee_set_overlay_addr(banshee, voodoo->swap_offset); + thread_set_event(voodoo->wake_fifo_thread); + voodoo->frame_count++; + } + + voodoo->overlay.src_y = 0; + banshee->desktop_addr = banshee->vidDesktopStartAddr; + banshee->desktop_y = 0; +} + +static uint8_t banshee_pci_read(int func, int addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + uint8_t ret = 0; + + if (func) + return 0xff; +// banshee_log("Banshee PCI read %08X ", addr); + switch (addr) + { + case 0x00: ret = 0x1a; break; /*3DFX*/ + case 0x01: ret = 0x12; break; + + case 0x02: ret = (banshee->type == TYPE_BANSHEE) ? 0x03 : 0x05; break; + case 0x03: ret = 0x00; break; + + case 0x04: ret = banshee->pci_regs[0x04] & 0x27; break; + + case 0x07: ret = banshee->pci_regs[0x07] & 0x36; break; + + case 0x08: ret = (banshee->type == TYPE_BANSHEE) ? 3 : 1; break; /*Revision ID*/ + case 0x09: ret = 0; break; /*Programming interface*/ + + case 0x0a: ret = 0x00; break; /*Supports VGA interface*/ + case 0x0b: ret = 0x03; break; + + case 0x0d: ret = banshee->pci_regs[0x0d] & 0xf8; break; + + case 0x10: ret = 0x00; break; /*memBaseAddr0*/ + case 0x11: ret = 0x00; break; + case 0x12: ret = 0x00; break; + case 0x13: ret = banshee->memBaseAddr0 >> 24; break; + + case 0x14: ret = 0x00; break; /*memBaseAddr1*/ + case 0x15: ret = 0x00; break; + case 0x16: ret = 0x00; break; + case 0x17: ret = banshee->memBaseAddr1 >> 24; break; + + case 0x18: ret = 0x01; break; /*ioBaseAddr*/ + case 0x19: ret = banshee->ioBaseAddr >> 8; break; + case 0x1a: ret = 0x00; break; + case 0x1b: ret = 0x00; break; + + /*Undocumented, but Voodoo 3 BIOS checks this*/ + case 0x2c: ret = 0x1a; break; + case 0x2d: ret = 0x12; break; + case 0x2e: ret = (banshee->type == TYPE_V3_3000) ? 0x3a : 0x30; break; + case 0x2f: ret = 0x00; break; + + case 0x30: ret = banshee->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/ + case 0x31: ret = 0x00; break; + case 0x32: ret = banshee->pci_regs[0x32]; break; + case 0x33: ret = banshee->pci_regs[0x33]; break; + + case 0x3c: ret = banshee->pci_regs[0x3c]; break; + + case 0x3d: ret = 0x01; break; /*INTA*/ + + case 0x3e: ret = 0x04; break; + case 0x3f: ret = 0xff; break; + + } +// banshee_log("%02X\n", ret); + return ret; +} + +static void banshee_pci_write(int func, int addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + + if (func) + return; +// banshee_log("Banshee write %08X %02X %04X:%08X\n", addr, val, CS, cpu_state.pc); + switch (addr) + { + case 0x00: case 0x01: case 0x02: case 0x03: + case 0x08: case 0x09: case 0x0a: case 0x0b: + case 0x3d: case 0x3e: case 0x3f: + return; + + case PCI_REG_COMMAND: + if (val & PCI_COMMAND_IO) + { + io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + if (banshee->ioBaseAddr) + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + + io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + if (banshee->ioBaseAddr) + io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + } + else + { + io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + } + banshee->pci_regs[PCI_REG_COMMAND] = val & 0x27; + banshee_updatemapping(banshee); + return; + case 0x07: + banshee->pci_regs[0x07] = val & 0x3e; + return; + case 0x0d: + banshee->pci_regs[0x0d] = val & 0xf8; + return; + + case 0x13: + banshee->memBaseAddr0 = (val & 0xfe) << 24; + banshee_updatemapping(banshee); + return; + + case 0x17: + banshee->memBaseAddr1 = (val & 0xfe) << 24; + banshee_updatemapping(banshee); + return; + + case 0x19: + if (banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + banshee->ioBaseAddr = val << 8; + if ((banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) && banshee->ioBaseAddr) + io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + banshee_log("Banshee ioBaseAddr=%08x\n", banshee->ioBaseAddr); + return; + + case 0x30: case 0x32: case 0x33: + banshee->pci_regs[addr] = val; + if (banshee->pci_regs[0x30] & 0x01) + { + uint32_t biosaddr = (banshee->pci_regs[0x32] << 16) | (banshee->pci_regs[0x33] << 24); + banshee_log("Banshee bios_rom enabled at %08x\n", biosaddr); + mem_mapping_set_addr(&banshee->bios_rom.mapping, biosaddr, 0x10000); + mem_mapping_enable(&banshee->bios_rom.mapping); + } + else + { + banshee_log("Banshee bios_rom disabled\n"); + mem_mapping_disable(&banshee->bios_rom.mapping); + } + return; + case 0x3c: + banshee->pci_regs[0x3c] = val; + return; + } +} + +static const device_config_t banshee_sgram_config[] = +{ + { + .name = "memory", + .description = "Memory size", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "8 MB", + .value = 8 + }, + { + .description = "16 MB", + .value = 16 + }, + { + .description = "" + } + }, + .default_int = 16 + }, + { + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "1", + .value = 1 + }, + { + .description = "2", + .value = 2 + }, + { + .description = "4", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, +#ifndef NO_CODEGEN + { + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 + }, +#endif + { + .type = -1 + } +}; + +static const device_config_t banshee_sdram_config[] = +{ + { + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "1", + .value = 1 + }, + { + .description = "2", + .value = 2 + }, + { + .description = "4", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, +#ifndef NO_CODEGEN + { + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 + }, +#endif + { + .type = -1 + } +}; + +static void *banshee_init_common(const device_t *info, wchar_t *fn, int has_sgram, int type, int voodoo_type) +{ + int mem_size; + banshee_t *banshee = malloc(sizeof(banshee_t)); + memset(banshee, 0, sizeof(banshee_t)); + + banshee->type = type; + + rom_init(&banshee->bios_rom, fn, 0xc0000, 0x10000, 0xffff, 0, MEM_MAPPING_EXTERNAL); + mem_mapping_disable(&banshee->bios_rom.mapping); + + if (has_sgram) + mem_size = device_get_config_int("memory"); + else + mem_size = 16; /*SDRAM Banshee only supports 16 MB*/ + + svga_init(info, &banshee->svga, banshee, mem_size << 20, + banshee_recalctimings, + banshee_in, banshee_out, + banshee_hwcursor_draw, + banshee_overlay_draw); + banshee->svga.vsync_callback = banshee_vsync_callback; + + mem_mapping_add(&banshee->linear_mapping, 0, 0, banshee_read_linear, + banshee_read_linear_w, + banshee_read_linear_l, + banshee_write_linear, + banshee_write_linear_w, + banshee_write_linear_l, + NULL, + MEM_MAPPING_EXTERNAL, + &banshee->svga); + mem_mapping_add(&banshee->reg_mapping_low, 0, 0,banshee_reg_read, + banshee_reg_readw, + banshee_reg_readl, + banshee_reg_write, + banshee_reg_writew, + banshee_reg_writel, + NULL, + MEM_MAPPING_EXTERNAL, + banshee); + mem_mapping_add(&banshee->reg_mapping_high, 0,0,banshee_reg_read, + banshee_reg_readw, + banshee_reg_readl, + banshee_reg_write, + banshee_reg_writew, + banshee_reg_writel, + NULL, + MEM_MAPPING_EXTERNAL, + banshee); + +// io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + + banshee->svga.bpp = 8; + banshee->svga.miscout = 1; + + banshee->dramInit0 = 1 << 27; + if (has_sgram && mem_size == 16) + banshee->dramInit0 |= (1 << 26); /*2xSGRAM = 16 MB*/ + if (!has_sgram) + banshee->dramInit1 = 1 << 30; /*SDRAM*/ + banshee->svga.decode_mask = 0x1ffffff; + + pci_add_card(PCI_ADD_VIDEO, banshee_pci_read, banshee_pci_write, banshee); + + banshee->voodoo = voodoo_2d3d_card_init(voodoo_type); + banshee->voodoo->p = banshee; + banshee->voodoo->vram = banshee->svga.vram; + banshee->voodoo->changedvram = banshee->svga.changedvram; + banshee->voodoo->fb_mem = banshee->svga.vram; + banshee->voodoo->fb_mask = banshee->svga.vram_mask; + banshee->voodoo->tex_mem[0] = banshee->svga.vram; + banshee->voodoo->tex_mem_w[0] = (uint16_t *)banshee->svga.vram; + banshee->voodoo->tex_mem[1] = banshee->svga.vram; + banshee->voodoo->tex_mem_w[1] = (uint16_t *)banshee->svga.vram; + banshee->voodoo->texture_mask = banshee->svga.vram_mask; + voodoo_generate_filter_v1(banshee->voodoo); + + banshee->vidSerialParallelPort = VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W; + + ddc_init(); + + return banshee; +} + +static void *banshee_init(const device_t *info) +{ + return banshee_init_common(info, L"roms/video/voodoo/Pci_sg.rom", 1, TYPE_BANSHEE, VOODOO_BANSHEE); +} +static void *creative_banshee_init(const device_t *info) +{ + return banshee_init_common(info, L"roms/video/voodoo/BlasterPCI.rom", 0, TYPE_BANSHEE, VOODOO_BANSHEE); +} +static void *v3_2000_init(const device_t *info) +{ + return banshee_init_common(info, L"roms/video/voodoo/2k11sd.rom", 0, TYPE_V3_2000, VOODOO_3); +} +static void *v3_3000_init(const device_t *info) +{ + return banshee_init_common(info, L"roms/video/voodoo/3k12sd.rom", 0, TYPE_V3_3000, VOODOO_3); +} + +static int banshee_available(void) +{ + return rom_present(L"roms/video/voodoo/Pci_sg.rom"); +} +static int creative_banshee_available(void) +{ + return rom_present(L"roms/video/voodoo/BlasterPCI.rom"); +} +static int v3_2000_available(void) +{ + return rom_present(L"roms/video/voodoo/2k11sd.rom"); +} +static int v3_3000_available(void) +{ + return rom_present(L"roms/video/voodoo/3k12sd.rom"); +} + +static void banshee_close(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + voodoo_card_close(banshee->voodoo); + svga_close(&banshee->svga); + + free(banshee); +} + +static void banshee_speed_changed(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + svga_recalctimings(&banshee->svga); +} + +static void banshee_force_redraw(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + banshee->svga.fullchange = changeframecount; +} + + +const device_t voodoo_banshee_device = +{ + "Voodoo Banshee PCI (reference)", + DEVICE_PCI, + 0, + banshee_init, + banshee_close, + NULL, + { banshee_available }, + banshee_speed_changed, + banshee_force_redraw, + banshee_sgram_config +}; + +const device_t creative_voodoo_banshee_device = +{ + "Creative Labs 3D Blaster Banshee PCI", + DEVICE_PCI, + 0, + creative_banshee_init, + banshee_close, + NULL, + { creative_banshee_available }, + banshee_speed_changed, + banshee_force_redraw, + banshee_sdram_config +}; + +const device_t voodoo_3_2000_device = +{ + "Voodoo 3 2000 PCI", + DEVICE_PCI, + 0, + v3_2000_init, + banshee_close, + NULL, + { v3_2000_available }, + banshee_speed_changed, + banshee_force_redraw, + banshee_sdram_config +}; + +const device_t voodoo_3_3000_device = +{ + "Voodoo 3 3000 PCI", + DEVICE_PCI, + 0, + v3_3000_init, + banshee_close, + NULL, + { v3_3000_available }, + banshee_speed_changed, + banshee_force_redraw, + banshee_sdram_config +}; diff --git a/src/video/vid_voodoo_banshee_blitter.c b/src/video/vid_voodoo_banshee_blitter.c new file mode 100644 index 000000000..2e5beef91 --- /dev/null +++ b/src/video/vid_voodoo_banshee_blitter.c @@ -0,0 +1,1393 @@ +/*Current issues : + - missing screen->screen scaled blits with format conversion + - missing YUV blits + - missing linestyle + - missing wait for vsync + - missing reversible lines + + Notes : + - 16 bpp runs with tiled framebuffer - to aid 3D? + 8 and 32 bpp use linear +*/ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_banshee_blitter.h> +#include <86box/vid_voodoo_render.h> + +#define COMMAND_CMD_MASK (0xf) +#define COMMAND_CMD_NOP (0 << 0) +#define COMMAND_CMD_SCREEN_TO_SCREEN_BLT (1 << 0) +#define COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT (2 << 0) +#define COMMAND_CMD_HOST_TO_SCREEN_BLT (3 << 0) +#define COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT (4 << 0) +#define COMMAND_CMD_RECTFILL (5 << 0) +#define COMMAND_CMD_LINE (6 << 0) +#define COMMAND_CMD_POLYLINE (7 << 0) +#define COMMAND_CMD_POLYFILL (8 << 0) +#define COMMAND_INITIATE (1 << 8) +#define COMMAND_INC_X_START (1 << 10) +#define COMMAND_INC_Y_START (1 << 11) +#define COMMAND_PATTERN_MONO (1 << 13) +#define COMMAND_DX (1 << 14) +#define COMMAND_DY (1 << 15) +#define COMMAND_TRANS_MONO (1 << 16) +#define COMMAND_PATOFF_X_MASK (7 << 17) +#define COMMAND_PATOFF_X_SHIFT (17) +#define COMMAND_PATOFF_Y_MASK (7 << 20) +#define COMMAND_PATOFF_Y_SHIFT (20) +#define COMMAND_CLIP_SEL (1 << 23) + +#define CMDEXTRA_SRC_COLORKEY (1 << 0) +#define CMDEXTRA_DST_COLORKEY (1 << 1) +#define CMDEXTRA_FORCE_PAT_ROW0 (1 << 3) + +#define SRC_FORMAT_STRIDE_MASK (0x1fff) +#define SRC_FORMAT_COL_MASK (0xf << 16) +#define SRC_FORMAT_COL_1_BPP (0 << 16) +#define SRC_FORMAT_COL_8_BPP (1 << 16) +#define SRC_FORMAT_COL_16_BPP (3 << 16) +#define SRC_FORMAT_COL_24_BPP (4 << 16) +#define SRC_FORMAT_COL_32_BPP (5 << 16) +#define SRC_FORMAT_COL_YUYV (8 << 16) +#define SRC_FORMAT_COL_UYVY (9 << 16) +#define SRC_FORMAT_BYTE_SWIZZLE (1 << 20) +#define SRC_FORMAT_WORD_SWIZZLE (1 << 21) +#define SRC_FORMAT_PACKING_MASK (3 << 22) +#define SRC_FORMAT_PACKING_STRIDE (0 << 22) +#define SRC_FORMAT_PACKING_BYTE (1 << 22) +#define SRC_FORMAT_PACKING_WORD (2 << 22) +#define SRC_FORMAT_PACKING_DWORD (3 << 22) + +#define DST_FORMAT_STRIDE_MASK (0x1fff) +#define DST_FORMAT_COL_MASK (0xf << 16) +#define DST_FORMAT_COL_8_BPP (1 << 16) +#define DST_FORMAT_COL_16_BPP (3 << 16) +#define DST_FORMAT_COL_24_BPP (4 << 16) +#define DST_FORMAT_COL_32_BPP (5 << 16) + +#define BRES_ERROR_MASK (0xffff) +#define BRES_ERROR_USE (1 << 31) + +enum +{ + COLORKEY_8, + COLORKEY_16, + COLORKEY_32 +}; + +#ifdef ENABLE_BANSHEEBLT_LOG +int bansheeblt_do_log = ENABLE_BANSHEEBLT_LOG; + +static void +bansheeblt_log(const char *fmt, ...) +{ + va_list ap; + + if (bansheeblt_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define banshee_log(fmt, ...) +#endif + +static int colorkey(voodoo_t *voodoo, uint32_t src, int src_notdst, int color_format) +{ + uint32_t min = src_notdst ? voodoo->banshee_blt.srcColorkeyMin : voodoo->banshee_blt.dstColorkeyMin; + uint32_t max = src_notdst ? voodoo->banshee_blt.srcColorkeyMax : voodoo->banshee_blt.dstColorkeyMax; + + if (!(voodoo->banshee_blt.commandExtra & (src_notdst ? CMDEXTRA_SRC_COLORKEY : CMDEXTRA_DST_COLORKEY))) + return 0; + + switch (color_format) + { + case COLORKEY_8: + return ((src & 0xff) >= (min & 0xff)) && ((src & 0xff) <= (max & 0xff)); + + case COLORKEY_16: + { + int r = (src >> 11) & 0x1f, r_min = (min >> 11) & 0x1f, r_max = (max >> 11) & 0x1f; + int g = (src >> 5) & 0x3f, g_min = (min >> 5) & 0x3f, g_max = (max >> 5) & 0x3f; + int b = src & 0x1f, b_min = min & 0x1f, b_max = max & 0x1f; + + return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) && + (b >= b_min) && (b <= b_max); + } + + case COLORKEY_32: + { + int r = (src >> 16) & 0xff, r_min = (min >> 16) & 0xff, r_max = (max >> 16) & 0xff; + int g = (src >> 8) & 0xff, g_min = (min >> 8) & 0xff, g_max = (max >> 8) & 0xff; + int b = src & 0xff, b_min = min & 0xff, b_max = max & 0xff; + + return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) && + (b >= b_min) && (b <= b_max); + } + + default: + return 0; + } +} + +static uint32_t MIX(voodoo_t *voodoo, uint32_t dest, uint32_t src, uint32_t pattern, int colour_format_src, int colour_format_dest) +{ + int rop_nr = 0; + uint32_t result = 0; + uint32_t rop; + + if (colorkey(voodoo, src, 1, colour_format_src)) + rop_nr |= 2; + if (colorkey(voodoo, dest, 0, colour_format_dest)) + rop_nr |= 1; + + rop = voodoo->banshee_blt.rops[rop_nr]; + + if (rop & 0x01) + result |= (~pattern & ~src & ~dest); + if (rop & 0x02) + result |= (~pattern & ~src & dest); + if (rop & 0x04) + result |= (~pattern & src & ~dest); + if (rop & 0x08) + result |= (~pattern & src & dest); + if (rop & 0x10) + result |= ( pattern & ~src & ~dest); + if (rop & 0x20) + result |= ( pattern & ~src & dest); + if (rop & 0x40) + result |= ( pattern & src & ~dest); + if (rop & 0x80) + result |= ( pattern & src & dest); + + return result; +} + +static uint32_t get_addr(voodoo_t *voodoo, int x, int y, int src_notdst, uint32_t src_stride) +{ + uint32_t stride = src_notdst ? src_stride : voodoo->banshee_blt.dst_stride; + uint32_t base_addr = src_notdst ? voodoo->banshee_blt.srcBaseAddr : voodoo->banshee_blt.dstBaseAddr; + + if (src_notdst ? voodoo->banshee_blt.srcBaseAddr_tiled : voodoo->banshee_blt.dstBaseAddr_tiled) + return (base_addr + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*stride) & voodoo->fb_mask; + else + return (base_addr + x + y*stride) & voodoo->fb_mask; +} + +static void PLOT(voodoo_t *voodoo, int x, int y, int pat_x, int pat_y, uint8_t pattern_mask, uint8_t rop, uint32_t src, int src_colorkey) +{ + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t addr = get_addr(voodoo, x, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_8); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t addr = get_addr(voodoo, x*2, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*2 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint16_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_16); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t addr = get_addr(voodoo, x*3, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*3 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[addr] = (MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32) & 0xffffff) | (dest & 0xff000000); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t addr = get_addr(voodoo, x*4, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*4 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + } +} + +static void update_src_stride(voodoo_t *voodoo) +{ + int bpp; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + bpp = 1; + break; + case SRC_FORMAT_COL_8_BPP: + bpp = 8; + break; + case SRC_FORMAT_COL_16_BPP: + bpp = 16; + break; + case SRC_FORMAT_COL_24_BPP: + bpp = 24; + break; + case SRC_FORMAT_COL_32_BPP: + bpp = 32; + break; + + default: + bpp = 16; + break; + } + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) + { + case SRC_FORMAT_PACKING_STRIDE: + voodoo->banshee_blt.src_stride_src = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + voodoo->banshee_blt.src_stride_dest = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + voodoo->banshee_blt.host_data_size_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.host_data_size_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3; +// bansheeblt_log("Stride packing %08x %08x bpp=%i dstSizeX=%i\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest, bpp, voodoo->banshee_blt.dstSizeX); + break; + + case SRC_FORMAT_PACKING_BYTE: + voodoo->banshee_blt.src_stride_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.src_stride_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// bansheeblt_log("Byte packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + + case SRC_FORMAT_PACKING_WORD: + voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 15) >> 4) * 2; + voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 15) >> 4) * 2; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// bansheeblt_log("Word packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + + case SRC_FORMAT_PACKING_DWORD: + voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 31) >> 5) * 4; + voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 31) >> 5) * 4; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// bansheeblt_log("Dword packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + } +} + +static void end_command(voodoo_t *voodoo) +{ + /*Update dest coordinates if required*/ + if (voodoo->banshee_blt.command & COMMAND_INC_X_START) + { + voodoo->banshee_blt.dstXY &= ~0x0000ffff; + voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstX & 0xffff); + } + + if (voodoo->banshee_blt.command & COMMAND_INC_Y_START) + { + voodoo->banshee_blt.dstXY &= ~0xffff0000; + voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstY << 16); + } +} + +static void banshee_do_rectfill(voodoo_t *voodoo) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + int dst_y = voodoo->banshee_blt.dstY; + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + +// bansheeblt_log("banshee_do_rectfill: size=%i,%i dst=%i,%i\n", voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY); +// bansheeblt_log("clipping: %i,%i -> %i,%i\n", clip->x_min, clip->y_min, clip->x_max, clip->y_max); +// bansheeblt_log("colorFore=%08x\n", voodoo->banshee_blt.colorFore); + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + int dst_x = voodoo->banshee_blt.dstX; + + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + } + dst_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + if (!(voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0)) + pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + + end_command(voodoo); +} + +static void do_screen_to_screen_line(voodoo_t *voodoo, uint8_t *src_p, int use_x_dir, int src_x, int src_tiled) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + int dst_y = voodoo->banshee_blt.dstY; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + int src_colorkey; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_8_BPP: + src_colorkey = COLORKEY_8; + break; + case SRC_FORMAT_COL_16_BPP: + src_colorkey = COLORKEY_16; + break; + default: + src_colorkey = COLORKEY_32; + break; + } +// bansheeblt_log("do_screen_to_screen_line: srcFormat=%08x dst=%08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.dstFormat); + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == + (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)) + { + /*No conversion required*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3; + + if (src_tiled) + src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32); + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = src_p[src_x_real]; + uint32_t dest = voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint16_t *)&src_p[src_x_real]; + uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x_real]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (dest & 0xff000000); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x_real]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + } + } + if (use_x_dir) + { + src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + else + { + src_x++; + dst_x++; + pat_x++; + } + } + } + voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + else + { + /*Conversion required*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { +// int src_x = voodoo->banshee_blt.srcX; + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3; + + if (src_tiled) + src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32); + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + uint32_t src_data = 0; + int transparent = 0; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + { + uint8_t src_byte = src_p[src_x_real]; + src_data = (src_byte & (0x80 >> (src_x & 7))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack; + if (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) + transparent = !(src_byte & (0x80 >> (src_x & 7))); +// bansheeblt_log(" 1bpp src_byte=%02x src_x=%i src_data=%x transparent=%i\n", src_byte, src_x, src_data, transparent); + break; + } + case SRC_FORMAT_COL_8_BPP: + { + src_data = src_p[src_x_real]; + break; + } + case SRC_FORMAT_COL_16_BPP: + { + uint16_t src_16 = *(uint16_t *)&src_p[src_x_real]; + int r = (src_16 >> 11); + int g = (src_16 >> 5) & 0x3f; + int b = src_16 & 0x1f; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + src_data = (r << 16) | (g << 8) | b; + break; + } + case SRC_FORMAT_COL_24_BPP: + { + src_data = *(uint32_t *)&src_p[src_x_real]; + break; + } + case SRC_FORMAT_COL_32_BPP: + { + src_data = *(uint32_t *)&src_p[src_x_real]; + break; + } + + default: + fatal("banshee_do_screen_to_screen_blt: unknown srcFormat %08x\n", voodoo->banshee_blt.srcFormat); + } + + if ((voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) == DST_FORMAT_COL_16_BPP && + (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) != SRC_FORMAT_COL_1_BPP) + { + int r = src_data >> 16; + int g = (src_data >> 8) & 0xff; + int b = src_data & 0xff; + + src_data = (b >> 3) | ((g >> 2) << 5) | ((r >> 3) << 11); + } + + if (!transparent) + PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, src_data, src_colorkey); + } + if (use_x_dir) + { + src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + else + { + src_x++; + dst_x++; + pat_x++; + } + } + } + voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } +} + +static void banshee_do_screen_to_screen_blt(voodoo_t *voodoo) +{ +// bansheeblt_log("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest); +// return; + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_dest); +// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) +// bansheeblt_log(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr); + do_screen_to_screen_line(voodoo, &voodoo->vram[src_addr], 1, voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcBaseAddr_tiled); + } + end_command(voodoo); +} + +static void banshee_do_host_to_screen_blt(voodoo_t *voodoo, int count, uint32_t data) +{ +// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194) +// bansheeblt_log("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE) + data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24); + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE) + data = (data >> 16) | (data << 16); + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE) + { + int last_byte; + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + last_byte = ((voodoo->banshee_blt.srcX & 31) + voodoo->banshee_blt.dstSizeX + 7) >> 3; + else + last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_dest; + + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + if (voodoo->banshee_blt.host_data_count >= last_byte) + { +// bansheeblt_log(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], 0, voodoo->banshee_blt.srcX & 7, 0); + else + do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, 0, 0); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3; + else + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK); + + voodoo->banshee_blt.host_data_count = 0; + } + } + else + { + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_dest) + { + voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_dest; + +// bansheeblt_log(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + do_screen_to_screen_line(voodoo, voodoo->banshee_blt.host_data, 0, 0, 0); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if (voodoo->banshee_blt.host_data_count) + { +// bansheeblt_log(" remaining=%i\n", voodoo->banshee_blt.host_data_count); + *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8; + } + } + } +} + +static void do_screen_to_screen_stretch_line(voodoo_t *voodoo,uint8_t *src_p, int src_x, int *src_y) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; +// int src_y = voodoo->banshee_blt.srcY; + int dst_y = voodoo->banshee_blt.dstY; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint32_t *colorPattern = voodoo->banshee_blt.colorPattern; + + //int error_y = voodoo->banshee_blt.dstSizeY / 2; + +/* bansheeblt_log("banshee_do_screen_to_screen_stretch_blt:\n"); + bansheeblt_log(" srcXY=%i,%i srcsizeXY=%i,%i\n", voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcY, voodoo->banshee_blt.srcSizeX, voodoo->banshee_blt.srcSizeY); + bansheeblt_log(" dstXY=%i,%i dstsizeXY=%i,%i\n", voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY, voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY);*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { +// int src_x = voodoo->banshee_blt.srcX; + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int error_x = voodoo->banshee_blt.dstSizeX / 2; + +// bansheeblt_log(" Plot dest line %03i : src line %03i\n", dst_y, src_y); + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = src_p[src_x]; + uint32_t dest = voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8); +// bansheeblt_log("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint16_t *)&src_p[src_x*2]; + uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16); +// bansheeblt_log("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, *(uint16_t *)&voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x*3]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (*(uint32_t *)&voodoo->vram[dst_addr] & 0xff000000); +// bansheeblt_log("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x*4]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32); +// bansheeblt_log("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + } + } + + error_x -= voodoo->banshee_blt.srcSizeX; + while (error_x < 0) + { + error_x += voodoo->banshee_blt.dstSizeX; + src_x++; + } + dst_x++; + pat_x++; + } + } + + voodoo->banshee_blt.bres_error_0 -= voodoo->banshee_blt.srcSizeY; + while (voodoo->banshee_blt.bres_error_0 < 0) + { + voodoo->banshee_blt.bres_error_0 += voodoo->banshee_blt.dstSizeY; + if (src_y) + (*src_y) += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; +// pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; +} + +static void banshee_do_screen_to_screen_stretch_blt(voodoo_t *voodoo) +{ +// bansheeblt_log("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest); +// return; + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_src);//(voodoo->banshee_blt.srcBaseAddr + voodoo->banshee_blt.srcY*voodoo->banshee_blt.src_stride_src) & voodoo->fb_mask; +// bansheeblt_log("scale_blit %i %08x %08x\n", voodoo->banshee_blt.cur_y, src_addr, voodoo->banshee_blt.command); +// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) +// bansheeblt_log(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr); + do_screen_to_screen_stretch_line(voodoo, &voodoo->vram[src_addr], voodoo->banshee_blt.srcX, &voodoo->banshee_blt.srcY); + } + end_command(voodoo); +} + +static void banshee_do_host_to_screen_stretch_blt(voodoo_t *voodoo, int count, uint32_t data) +{ +// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194) +// bansheeblt_log("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE) + data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24); + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE) + data = (data >> 16) | (data << 16); + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE) + { + int last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_src; + + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + if (voodoo->banshee_blt.host_data_count >= last_byte) + { +// bansheeblt_log(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], voodoo->banshee_blt.srcX & 7, NULL); + else + do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, NULL); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3; + else + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK); + + voodoo->banshee_blt.host_data_count = 0; + } + } + else + { + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_src) + { + voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_src; + +// bansheeblt_log(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + do_screen_to_screen_stretch_line(voodoo, voodoo->banshee_blt.host_data, 0, NULL); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if (voodoo->banshee_blt.host_data_count) + { +// bansheeblt_log(" remaining=%i\n", voodoo->banshee_blt.host_data_count); + *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8; + } + } + } +} + +static void banshee_do_line(voodoo_t *voodoo) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.srcY); + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.srcX; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + int dx = ABS(voodoo->banshee_blt.dstX - voodoo->banshee_blt.srcX); + int dy = ABS(voodoo->banshee_blt.dstY - voodoo->banshee_blt.srcY); + int x_inc = (voodoo->banshee_blt.dstX > voodoo->banshee_blt.srcX) ? 1 : -1; + int y_inc = (voodoo->banshee_blt.dstY > voodoo->banshee_blt.srcY) ? 1 : -1; + int x = voodoo->banshee_blt.srcX; + int y = voodoo->banshee_blt.srcY; + int error; + + if (dx > dy) /*X major*/ + { + error = dx/2; + while (x != voodoo->banshee_blt.dstX) + { + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT(voodoo, x, y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + + error -= dy; + if (error < 0) + { + error += dx; + y += y_inc; + pat_y += y_inc; + } + x += x_inc; + pat_x += x_inc; + } + } + else /*Y major*/ + { + error = dy/2; + while (y != voodoo->banshee_blt.dstY) + { + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT(voodoo, x, y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + + error -= dx; + if (error < 0) + { + error += dy; + x += x_inc; + pat_x += x_inc; + } + y += y_inc; + pat_y += y_inc; + } + } + + voodoo->banshee_blt.srcXY = (x & 0xffff) | (y << 16); + voodoo->banshee_blt.srcX = x; + voodoo->banshee_blt.srcY = y; +} + +static void banshee_polyfill_start(voodoo_t *voodoo) +{ + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.srcY; + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.dstX; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.dstY; + voodoo->banshee_blt.lx[1] = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.ly[1] = voodoo->banshee_blt.srcY; + voodoo->banshee_blt.rx[1] = voodoo->banshee_blt.dstX; + voodoo->banshee_blt.ry[1] = voodoo->banshee_blt.dstY; + voodoo->banshee_blt.lx_cur = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.rx_cur = voodoo->banshee_blt.dstX; +} + +static void banshee_polyfill_continue(voodoo_t *voodoo, uint32_t data) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + int y = MAX(voodoo->banshee_blt.ly[0], voodoo->banshee_blt.ry[0]); + int y_end; + +// bansheeblt_log("Polyfill : data %08x\n", data); + + /*if r1.y>=l1.y, next vertex is left*/ + if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[1] = ((int32_t)(data << 19)) >> 19; + voodoo->banshee_blt.ly[1] = ((int32_t)(data << 3)) >> 19; + voodoo->banshee_blt.dx[0] = ABS(voodoo->banshee_blt.lx[1] - voodoo->banshee_blt.lx[0]); + voodoo->banshee_blt.dy[0] = ABS(voodoo->banshee_blt.ly[1] - voodoo->banshee_blt.ly[0]); + voodoo->banshee_blt.x_inc[0] = (voodoo->banshee_blt.lx[1] > voodoo->banshee_blt.lx[0]) ? 1 : -1; + voodoo->banshee_blt.error[0] = voodoo->banshee_blt.dy[0] / 2; + } + else + { + voodoo->banshee_blt.rx[1] = ((int32_t)(data << 19)) >> 19; + voodoo->banshee_blt.ry[1] = ((int32_t)(data << 3)) >> 19; + voodoo->banshee_blt.dx[1] = ABS(voodoo->banshee_blt.rx[1] - voodoo->banshee_blt.rx[0]); + voodoo->banshee_blt.dy[1] = ABS(voodoo->banshee_blt.ry[1] - voodoo->banshee_blt.ry[0]); + voodoo->banshee_blt.x_inc[1] = (voodoo->banshee_blt.rx[1] > voodoo->banshee_blt.rx[0]) ? 1 : -1; + voodoo->banshee_blt.error[1] = voodoo->banshee_blt.dy[1] / 2; + } + +/* bansheeblt_log(" verts now : %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[0], voodoo->banshee_blt.ly[0], voodoo->banshee_blt.rx[0], voodoo->banshee_blt.ry[0]); + bansheeblt_log(" %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[1], voodoo->banshee_blt.ly[1], voodoo->banshee_blt.rx[1], voodoo->banshee_blt.ry[1]); + bansheeblt_log(" left dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[0],voodoo->banshee_blt.dy[0],voodoo->banshee_blt.x_inc[0],voodoo->banshee_blt.error[0]); + bansheeblt_log(" right dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[1],voodoo->banshee_blt.dy[1],voodoo->banshee_blt.x_inc[1],voodoo->banshee_blt.error[1]);*/ + y_end = MIN(voodoo->banshee_blt.ly[1], voodoo->banshee_blt.ry[1]); +// bansheeblt_log("Polyfill : draw spans from %i-%i\n", y, y_end); + for (; y < y_end; y++) + { +// bansheeblt_log(" %i: %i %i\n", y, voodoo->banshee_blt.lx_cur, voodoo->banshee_blt.rx_cur); + /*Draw span from lx_cur to rx_cur*/ + if (y >= clip->y_min && y < clip->y_max) + { + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + y); + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int x; + + for (x = voodoo->banshee_blt.lx_cur; x < voodoo->banshee_blt.rx_cur; x++) + { + int pat_x = voodoo->banshee_blt.patoff_x + x; + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT(voodoo, x, y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + } + } + + voodoo->banshee_blt.error[0] -= voodoo->banshee_blt.dx[0]; + while (voodoo->banshee_blt.error[0] < 0) + { + voodoo->banshee_blt.error[0] += voodoo->banshee_blt.dy[0]; + voodoo->banshee_blt.lx_cur += voodoo->banshee_blt.x_inc[0]; + } + voodoo->banshee_blt.error[1] -= voodoo->banshee_blt.dx[1]; + while (voodoo->banshee_blt.error[1] < 0) + { + voodoo->banshee_blt.error[1] += voodoo->banshee_blt.dy[1]; + voodoo->banshee_blt.rx_cur += voodoo->banshee_blt.x_inc[1]; + } + } + + if (voodoo->banshee_blt.ry[1] == voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1]; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1]; + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1]; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1]; + } + else if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1]; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1]; + } + else + { + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1]; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1]; + } +} + +static void banshee_do_2d_blit(voodoo_t *voodoo, int count, uint32_t data) +{ + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { + case COMMAND_CMD_NOP: + break; + + case COMMAND_CMD_SCREEN_TO_SCREEN_BLT: + banshee_do_screen_to_screen_blt(voodoo); + break; + + case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT: + banshee_do_screen_to_screen_stretch_blt(voodoo); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_BLT: + banshee_do_host_to_screen_blt(voodoo, count, data); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT: + banshee_do_host_to_screen_stretch_blt(voodoo, count, data); + break; + + case COMMAND_CMD_RECTFILL: + banshee_do_rectfill(voodoo); + break; + + case COMMAND_CMD_POLYLINE: + banshee_do_line(voodoo); + break; + + default: + fatal("banshee_do_2d_blit: unknown command=%08x\n", voodoo->banshee_blt.command); + } +} + +void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val) +{ +// /*if ((addr & 0x1fc) != 0x80) */bansheeblt_log("2D reg write %03x %08x\n", addr & 0x1fc, val); + switch (addr & 0x1fc) + { + case 0x08: + voodoo->banshee_blt.clip0Min = val; + voodoo->banshee_blt.clip[0].x_min = val & 0xfff; + voodoo->banshee_blt.clip[0].y_min = (val >> 16) & 0xfff; + break; + case 0x0c: + voodoo->banshee_blt.clip0Max = val; + voodoo->banshee_blt.clip[0].x_max = val & 0xfff; + voodoo->banshee_blt.clip[0].y_max = (val >> 16) & 0xfff; + break; + case 0x10: + voodoo->banshee_blt.dstBaseAddr = val & 0xffffff; + voodoo->banshee_blt.dstBaseAddr_tiled = val & 0x80000000; + if (voodoo->banshee_blt.dstBaseAddr_tiled) + voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK; +// bansheeblt_log("dstBaseAddr=%08x\n", val); + break; + case 0x14: + voodoo->banshee_blt.dstFormat = val; + if (voodoo->banshee_blt.dstBaseAddr_tiled) + voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK; +// bansheeblt_log("dstFormat=%08x\n", val); + break; + + case 0x18: + voodoo->banshee_blt.srcColorkeyMin = val & 0xffffff; + break; + case 0x1c: + voodoo->banshee_blt.srcColorkeyMax = val & 0xffffff; + break; + case 0x20: + voodoo->banshee_blt.dstColorkeyMin = val & 0xffffff; + break; + case 0x24: + voodoo->banshee_blt.dstColorkeyMax = val & 0xffffff; + break; + + case 0x28: + voodoo->banshee_blt.bresError0 = val; + voodoo->banshee_blt.bres_error_0 = val & 0xffff; + break; + case 0x2c: + voodoo->banshee_blt.bresError1 = val; + voodoo->banshee_blt.bres_error_1 = val & 0xffff; + break; + + case 0x30: + voodoo->banshee_blt.rop = val; + voodoo->banshee_blt.rops[1] = val & 0xff; + voodoo->banshee_blt.rops[2] = (val >> 8) & 0xff; + voodoo->banshee_blt.rops[3] = (val >> 16) & 0xff; +// bansheeblt_log("rop=%08x\n", val); + break; + case 0x34: + voodoo->banshee_blt.srcBaseAddr = val & 0xffffff; + voodoo->banshee_blt.srcBaseAddr_tiled = val & 0x80000000; + if (voodoo->banshee_blt.srcBaseAddr_tiled) + voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + update_src_stride(voodoo); +// bansheeblt_log("srcBaseAddr=%08x\n", val); + break; + case 0x38: + voodoo->banshee_blt.commandExtra = val; +// bansheeblt_log("commandExtra=%08x\n", val); + break; + case 0x44: + voodoo->banshee_blt.colorPattern[0] = val; +// bansheeblt_log("colorPattern0=%08x\n", val); + voodoo->banshee_blt.colorPattern24[0] = val & 0xffffff; + voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xffff00) | (val >> 24); + voodoo->banshee_blt.colorPattern16[0] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[1] = (val >> 16) & 0xffff; + voodoo->banshee_blt.colorPattern8[0] = val & 0xff; + voodoo->banshee_blt.colorPattern8[1] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[2] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[3] = (val >> 24) & 0xff; + break; + case 0x48: + voodoo->banshee_blt.colorPattern[1] = val; +// bansheeblt_log("colorPattern1=%08x\n", val); + voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xff) | ((val & 0xffff) << 8); + voodoo->banshee_blt.colorPattern24[2] = (voodoo->banshee_blt.colorPattern24[2] & 0xff0000) | (val >> 16); + voodoo->banshee_blt.colorPattern16[2] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[3] = (val >> 16) & 0xffff; + voodoo->banshee_blt.colorPattern8[4] = val & 0xff; + voodoo->banshee_blt.colorPattern8[5] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[6] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[7] = (val >> 24) & 0xff; + break; + case 0x4c: + voodoo->banshee_blt.clip1Min = val; + voodoo->banshee_blt.clip[1].x_min = val & 0xfff; + voodoo->banshee_blt.clip[1].y_min = (val >> 16) & 0xfff; + break; + case 0x50: + voodoo->banshee_blt.clip1Max = val; + voodoo->banshee_blt.clip[1].x_max = val & 0xfff; + voodoo->banshee_blt.clip[1].y_max = (val >> 16) & 0xfff; + break; + case 0x54: + voodoo->banshee_blt.srcFormat = val; + if (voodoo->banshee_blt.srcBaseAddr_tiled) + voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + update_src_stride(voodoo); + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + voodoo->banshee_blt.src_bpp = 1; + break; + case SRC_FORMAT_COL_8_BPP: + voodoo->banshee_blt.src_bpp = 8; + break; + case SRC_FORMAT_COL_24_BPP: + voodoo->banshee_blt.src_bpp = 24; + break; + case SRC_FORMAT_COL_32_BPP: + voodoo->banshee_blt.src_bpp = 32; + break; + case SRC_FORMAT_COL_16_BPP: default: + voodoo->banshee_blt.src_bpp = 16; + break; + } +// bansheeblt_log("srcFormat=%08x\n", val); + break; + case 0x58: + voodoo->banshee_blt.srcSize = val; + voodoo->banshee_blt.srcSizeX = voodoo->banshee_blt.srcSize & 0x1fff; + voodoo->banshee_blt.srcSizeY = (voodoo->banshee_blt.srcSize >> 16) & 0x1fff; + update_src_stride(voodoo); +// bansheeblt_log("srcSize=%08x\n", val); + break; + case 0x5c: + voodoo->banshee_blt.srcXY = val; + voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19; + update_src_stride(voodoo); +// bansheeblt_log("srcXY=%08x\n", val); + break; + case 0x60: + voodoo->banshee_blt.colorBack = val; + break; + case 0x64: + voodoo->banshee_blt.colorFore = val; + break; + case 0x68: + voodoo->banshee_blt.dstSize = val; + voodoo->banshee_blt.dstSizeX = voodoo->banshee_blt.dstSize & 0x1fff; + voodoo->banshee_blt.dstSizeY = (voodoo->banshee_blt.dstSize >> 16) & 0x1fff; + update_src_stride(voodoo); +// bansheeblt_log("dstSize=%08x\n", val); + break; + case 0x6c: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; +// bansheeblt_log("dstXY=%08x\n", val); + break; + case 0x70: + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->banshee_blt.command = val; + voodoo->banshee_blt.rops[0] = val >> 24; +// bansheeblt_log("command=%x %08x\n", voodoo->banshee_blt.command & COMMAND_CMD_MASK, val); + voodoo->banshee_blt.patoff_x = (val & COMMAND_PATOFF_X_MASK) >> COMMAND_PATOFF_X_SHIFT; + voodoo->banshee_blt.patoff_y = (val & COMMAND_PATOFF_Y_MASK) >> COMMAND_PATOFF_Y_SHIFT; + voodoo->banshee_blt.cur_x = 0; + voodoo->banshee_blt.cur_y = 0; + voodoo->banshee_blt.dstX = ((int32_t)(voodoo->banshee_blt.dstXY << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(voodoo->banshee_blt.dstXY << 3)) >> 19; + voodoo->banshee_blt.srcX = ((int32_t)(voodoo->banshee_blt.srcXY << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(voodoo->banshee_blt.srcXY << 3)) >> 19; + voodoo->banshee_blt.old_srcX = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.host_data_remainder = 0; + voodoo->banshee_blt.host_data_count = 0; + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { +/* case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT: + if (voodoo->banshee_blt.bresError0 & BRES_ERROR_USE) + voodoo->banshee_blt.bres_error_0 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError0 & BRES_ERROR_MASK); + else + voodoo->banshee_blt.bres_error_0 = voodoo->banshee_blt.dstSizeY / 2; + if (voodoo->banshee_blt.bresError1 & BRES_ERROR_USE) + voodoo->banshee_blt.bres_error_1 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError1 & BRES_ERROR_MASK); + else + voodoo->banshee_blt.bres_error_1 = voodoo->banshee_blt.dstSizeX / 2; + + if (val & COMMAND_INITIATE) + banshee_do_2d_blit(voodoo, -1, 0); + break;*/ + + case COMMAND_CMD_POLYFILL: + if (val & COMMAND_INITIATE) + { + voodoo->banshee_blt.dstXY = voodoo->banshee_blt.srcXY; + voodoo->banshee_blt.dstX = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.dstY = voodoo->banshee_blt.srcY; + } + banshee_polyfill_start(voodoo); + break; + + default: + if (val & COMMAND_INITIATE) + { + banshee_do_2d_blit(voodoo, -1, 0); + // fatal("Initiate command!\n"); + } + break; + } + break; + + case 0x80: case 0x84: case 0x88: case 0x8c: + case 0x90: case 0x94: case 0x98: case 0x9c: + case 0xa0: case 0xa4: case 0xa8: case 0xac: + case 0xb0: case 0xb4: case 0xb8: case 0xbc: + case 0xc0: case 0xc4: case 0xc8: case 0xcc: + case 0xd0: case 0xd4: case 0xd8: case 0xdc: + case 0xe0: case 0xe4: case 0xe8: case 0xec: + case 0xf0: case 0xf4: case 0xf8: case 0xfc: +// bansheeblt_log("launch %08x %08x %08x %08x\n", voodoo->banshee_blt.command, voodoo->banshee_blt.commandExtra, voodoo->banshee_blt.srcColorkeyMin, voodoo->banshee_blt.srcColorkeyMax); + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { + case COMMAND_CMD_SCREEN_TO_SCREEN_BLT: + voodoo->banshee_blt.srcXY = val; + voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19; + banshee_do_screen_to_screen_blt(voodoo); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_BLT: + banshee_do_2d_blit(voodoo, 32, val); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT: + banshee_do_2d_blit(voodoo, 32, val); + break; + + case COMMAND_CMD_RECTFILL: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_rectfill(voodoo); + break; + +/* case COMMAND_CMD_LINE: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_line(voodoo); + break;*/ + + case COMMAND_CMD_POLYLINE: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_line(voodoo); + break; + + case COMMAND_CMD_POLYFILL: + banshee_polyfill_continue(voodoo, val); + break; + + default: + fatal("launch area write, command=%08x\n", voodoo->banshee_blt.command); + } + break; + + case 0x100: case 0x104: case 0x108: case 0x10c: + case 0x110: case 0x114: case 0x118: case 0x11c: + case 0x120: case 0x124: case 0x128: case 0x12c: + case 0x130: case 0x134: case 0x138: case 0x13c: + case 0x140: case 0x144: case 0x148: case 0x14c: + case 0x150: case 0x154: case 0x158: case 0x15c: + case 0x160: case 0x164: case 0x168: case 0x16c: + case 0x170: case 0x174: case 0x178: case 0x17c: + case 0x180: case 0x184: case 0x188: case 0x18c: + case 0x190: case 0x194: case 0x198: case 0x19c: + case 0x1a0: case 0x1a4: case 0x1a8: case 0x1ac: + case 0x1b0: case 0x1b4: case 0x1b8: case 0x1bc: + case 0x1c0: case 0x1c4: case 0x1c8: case 0x1cc: + case 0x1d0: case 0x1d4: case 0x1d8: case 0x1dc: + case 0x1e0: case 0x1e4: case 0x1e8: case 0x1ec: + case 0x1f0: case 0x1f4: case 0x1f8: case 0x1fc: + voodoo->banshee_blt.colorPattern[(addr >> 2) & 63] = val; + if ((addr & 0x1fc) < 0x1c0) + { + int base_addr = (addr & 0xfc) / 0xc; + uintptr_t src_p = (uintptr_t)&voodoo->banshee_blt.colorPattern[base_addr * 3]; + int col24 = base_addr * 4; + + voodoo->banshee_blt.colorPattern24[col24] = *(uint32_t *)src_p & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 1] = *(uint32_t *)(src_p + 3) & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 2] = *(uint32_t *)(src_p + 6) & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 3] = *(uint32_t *)(src_p + 9) & 0xffffff; + } + if ((addr & 0x1fc) < 0x180) + { + voodoo->banshee_blt.colorPattern16[(addr >> 1) & 62] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[((addr >> 1) & 62) + 1] = (val >> 16) & 0xffff; + } + if ((addr & 0x1fc) < 0x140) + { + voodoo->banshee_blt.colorPattern8[addr & 60] = val & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 1] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 2] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 3] = (val >> 24) & 0xff; + } +// bansheeblt_log("colorPattern%02x=%08x\n", (addr >> 2) & 63, val); + break; + + default: + fatal("Unknown 2D reg write %03x %08x\n", addr & 0x1fc, val); + } +} diff --git a/src/video/vid_voodoo_blitter.c b/src/video/vid_voodoo_blitter.c new file mode 100644 index 000000000..69ebb49db --- /dev/null +++ b/src/video/vid_voodoo_blitter.c @@ -0,0 +1,554 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/pci.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_blitter.h> +#include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> + + +enum +{ + BLIT_COMMAND_SCREEN_TO_SCREEN = 0, + BLIT_COMMAND_CPU_TO_SCREEN = 1, + BLIT_COMMAND_RECT_FILL = 2, + BLIT_COMMAND_SGRAM_FILL = 3 +}; + +enum +{ + BLIT_SRC_1BPP = (0 << 3), + BLIT_SRC_1BPP_BYTE_PACKED = (1 << 3), + BLIT_SRC_16BPP = (2 << 3), + BLIT_SRC_24BPP = (3 << 3), + BLIT_SRC_24BPP_DITHER_2X2 = (4 << 3), + BLIT_SRC_24BPP_DITHER_4X4 = (5 << 3) +}; + +enum +{ + BLIT_SRC_RGB_ARGB = (0 << 6), + BLIT_SRC_RGB_ABGR = (1 << 6), + BLIT_SRC_RGB_RGBA = (2 << 6), + BLIT_SRC_RGB_BGRA = (3 << 6) +}; + +enum +{ + BLIT_COMMAND_MASK = 7, + BLIT_SRC_FORMAT = (7 << 3), + BLIT_SRC_RGB_FORMAT = (3 << 6), + BLIT_SRC_CHROMA = (1 << 10), + BLIT_DST_CHROMA = (1 << 12), + BLIT_CLIPPING_ENABLED = (1 << 16) +}; + +enum +{ + BLIT_ROP_DST_PASS = (1 << 0), + BLIT_ROP_SRC_PASS = (1 << 1) +}; + + +#ifdef ENABLE_VOODOOBLT_LOG +int voodooblt_do_log = ENABLE_VOODOOBLT_LOG; + + +static void +voodooblt_log(const char *fmt, ...) +{ + va_list ap; + + if (voodooblt_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodooblt_log(fmt, ...) +#endif + + +#define MIX(src_dat, dst_dat, rop) \ + switch (rop) \ + { \ + case 0x0: dst_dat = 0; break; \ + case 0x1: dst_dat = ~(src_dat | dst_dat); break; \ + case 0x2: dst_dat = ~src_dat & dst_dat; break; \ + case 0x3: dst_dat = ~src_dat; break; \ + case 0x4: dst_dat = src_dat & ~dst_dat; break; \ + case 0x5: dst_dat = ~dst_dat; break; \ + case 0x6: dst_dat = src_dat ^ dst_dat; break; \ + case 0x7: dst_dat = ~(src_dat & dst_dat); break; \ + case 0x8: dst_dat = src_dat & dst_dat; break; \ + case 0x9: dst_dat = ~(src_dat ^ dst_dat); break; \ + case 0xa: dst_dat = dst_dat; break; \ + case 0xb: dst_dat = ~src_dat | dst_dat; break; \ + case 0xc: dst_dat = src_dat; break; \ + case 0xd: dst_dat = src_dat | ~dst_dat; break; \ + case 0xe: dst_dat = src_dat | dst_dat; break; \ + case 0xf: dst_dat = 0xffff; break; \ + } + +void voodoo_v2_blit_start(voodoo_t *voodoo) +{ + uint64_t dat64; + int size_x = ABS(voodoo->bltSizeX), size_y = ABS(voodoo->bltSizeY); + int x_dir = (voodoo->bltSizeX > 0) ? 1 : -1; + int y_dir = (voodoo->bltSizeY > 0) ? 1 : -1; + int dst_x; + int src_y = voodoo->bltSrcY & 0x7ff, dst_y = voodoo->bltDstY & 0x7ff; + int src_stride = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcXYStride & 0x3f) * 32*2) : (voodoo->bltSrcXYStride & 0xff8); + int dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); + uint32_t src_base_addr = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcBaseAddr & 0x3ff) << 12) : (voodoo->bltSrcBaseAddr & 0x3ffff8); + uint32_t dst_base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); + int x, y; + +/* voodooblt_log("blit_start: command=%08x srcX=%i srcY=%i dstX=%i dstY=%i sizeX=%i sizeY=%i color=%04x,%04x\n", + voodoo->bltCommand, voodoo->bltSrcX, voodoo->bltSrcY, voodoo->bltDstX, voodoo->bltDstY, voodoo->bltSizeX, voodoo->bltSizeY, voodoo->bltColorFg, voodoo->bltColorBg);*/ + + voodoo_wait_for_render_thread_idle(voodoo); + + switch (voodoo->bltCommand & BLIT_COMMAND_MASK) + { + case BLIT_COMMAND_SCREEN_TO_SCREEN: + for (y = 0; y <= size_y; y++) + { + uint16_t *src = (uint16_t *)&voodoo->fb_mem[src_base_addr + src_y*src_stride]; + uint16_t *dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; + int src_x = voodoo->bltSrcX, dst_x = voodoo->bltDstX; + + for (x = 0; x <= size_x; x++) + { + uint16_t src_dat = src[src_x]; + uint16_t dst_dat = dst[dst_x]; + int rop = 0; + + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || + dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) + goto skip_pixel_blit; + } + + if (voodoo->bltCommand & BLIT_SRC_CHROMA) + { + int r = (src_dat >> 11); + int g = (src_dat >> 5) & 0x3f; + int b = src_dat & 0x1f; + + if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && + g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && + b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) + rop |= BLIT_ROP_SRC_PASS; + } + if (voodoo->bltCommand & BLIT_DST_CHROMA) + { + int r = (dst_dat >> 11); + int g = (dst_dat >> 5) & 0x3f; + int b = dst_dat & 0x1f; + + if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && + g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && + b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) + rop |= BLIT_ROP_DST_PASS; + } + + MIX(src_dat, dst_dat, voodoo->bltRop[rop]); + + dst[dst_x] = dst_dat; +skip_pixel_blit: + src_x += x_dir; + dst_x += x_dir; + } + + src_y += y_dir; + dst_y += y_dir; + } + break; + + case BLIT_COMMAND_CPU_TO_SCREEN: + voodoo->blt.dst_x = voodoo->bltDstX; + voodoo->blt.dst_y = voodoo->bltDstY; + voodoo->blt.cur_x = 0; + voodoo->blt.size_x = size_x; + voodoo->blt.size_y = size_y; + voodoo->blt.x_dir = x_dir; + voodoo->blt.y_dir = y_dir; + voodoo->blt.dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); + break; + + case BLIT_COMMAND_RECT_FILL: + for (y = 0; y <= size_y; y++) + { + uint16_t *dst; + int dst_x = voodoo->bltDstX; + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) + goto skip_line_fill; + dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + (dst_y >> 1) * dst_stride]; + } + else + dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; + + for (x = 0; x <= size_x; x++) + { + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || + dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) + goto skip_pixel_fill; + } + + dst[dst_x] = voodoo->bltColorFg; +skip_pixel_fill: + dst_x += x_dir; + } +skip_line_fill: + dst_y += y_dir; + } + break; + + case BLIT_COMMAND_SGRAM_FILL: + /*32x32 tiles - 2kb*/ + dst_y = voodoo->bltDstY & 0x3ff; + size_x = voodoo->bltSizeX & 0x1ff; //512*8 = 4kb + size_y = voodoo->bltSizeY & 0x3ff; + + dat64 = voodoo->bltColorFg | ((uint64_t)voodoo->bltColorFg << 16) | + ((uint64_t)voodoo->bltColorFg << 32) | ((uint64_t)voodoo->bltColorFg << 48); + + for (y = 0; y <= size_y; y++) + { + uint64_t *dst; + + /*This may be wrong*/ + if (!y) + { + dst_x = voodoo->bltDstX & 0x1ff; + size_x = 511 - dst_x; + } + else if (y < size_y) + { + dst_x = 0; + size_x = 511; + } + else + { + dst_x = 0; + size_x = voodoo->bltSizeX & 0x1ff; + } + + dst = (uint64_t *)&voodoo->fb_mem[(dst_y*512*8 + dst_x*8) & voodoo->fb_mask]; + + for (x = 0; x <= size_x; x++) + dst[x] = dat64; + + dst_y++; + } + break; + + default: + fatal("bad blit command %08x\n", voodoo->bltCommand); + } +} + +void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data) +{ + int src_bits = 32; + uint32_t base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); + uint32_t addr; + uint16_t *dst; + + if ((voodoo->bltCommand & BLIT_COMMAND_MASK) != BLIT_COMMAND_CPU_TO_SCREEN) + return; + + if (SLI_ENABLED) + { + addr = base_addr + (voodoo->blt.dst_y >> 1) * voodoo->blt.dst_stride; + dst = (uint16_t *)&voodoo->fb_mem[addr]; + } + else + { + addr = base_addr + voodoo->blt.dst_y*voodoo->blt.dst_stride; + dst = (uint16_t *)&voodoo->fb_mem[addr]; + } + + if (addr >= voodoo->front_offset && voodoo->row_width) + { + int y = (addr - voodoo->front_offset) / voodoo->row_width; + if (y < voodoo->v_disp) + voodoo->dirty_line[y] = 2; + } + + while (src_bits && voodoo->blt.cur_x <= voodoo->blt.size_x) + { + int r = 0, g = 0, b = 0; + uint16_t src_dat = 0, dst_dat; + int x = (voodoo->blt.x_dir > 0) ? (voodoo->blt.dst_x + voodoo->blt.cur_x) : (voodoo->blt.dst_x - voodoo->blt.cur_x); + int rop = 0; + + switch (voodoo->bltCommand & BLIT_SRC_FORMAT) + { + case BLIT_SRC_1BPP: case BLIT_SRC_1BPP_BYTE_PACKED: + src_dat = (data & 1) ? voodoo->bltColorFg : voodoo->bltColorBg; + data >>= 1; + src_bits--; + break; + case BLIT_SRC_16BPP: + switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) + { + case BLIT_SRC_RGB_ARGB: case BLIT_SRC_RGB_RGBA: + src_dat = data & 0xffff; + break; + case BLIT_SRC_RGB_ABGR: case BLIT_SRC_RGB_BGRA: + src_dat = ((data & 0xf800) >> 11) | (data & 0x07c0) | ((data & 0x0038) << 11); + break; + } + data >>= 16; + src_bits -= 16; + break; + case BLIT_SRC_24BPP: case BLIT_SRC_24BPP_DITHER_2X2: case BLIT_SRC_24BPP_DITHER_4X4: + switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) + { + case BLIT_SRC_RGB_ARGB: + r = (data >> 16) & 0xff; + g = (data >> 8) & 0xff; + b = data & 0xff; + break; + case BLIT_SRC_RGB_ABGR: + r = data & 0xff; + g = (data >> 8) & 0xff; + b = (data >> 16) & 0xff; + break; + case BLIT_SRC_RGB_RGBA: + r = (data >> 24) & 0xff; + g = (data >> 16) & 0xff; + b = (data >> 8) & 0xff; + break; + case BLIT_SRC_RGB_BGRA: + r = (data >> 8) & 0xff; + g = (data >> 16) & 0xff; + b = (data >> 24) & 0xff; + break; + } + switch (voodoo->bltCommand & BLIT_SRC_FORMAT) + { + case BLIT_SRC_24BPP: + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + case BLIT_SRC_24BPP_DITHER_2X2: + r = dither_rb2x2[r][voodoo->blt.dst_y & 1][x & 1]; + g = dither_g2x2[g][voodoo->blt.dst_y & 1][x & 1]; + b = dither_rb2x2[b][voodoo->blt.dst_y & 1][x & 1]; + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + case BLIT_SRC_24BPP_DITHER_4X4: + r = dither_rb[r][voodoo->blt.dst_y & 3][x & 3]; + g = dither_g[g][voodoo->blt.dst_y & 3][x & 3]; + b = dither_rb[b][voodoo->blt.dst_y & 3][x & 3]; + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + } + src_bits = 0; + break; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) + goto skip_pixel; + } + + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (x < voodoo->bltClipLeft || x >= voodoo->bltClipRight || + voodoo->blt.dst_y < voodoo->bltClipLowY || voodoo->blt.dst_y >= voodoo->bltClipHighY) + goto skip_pixel; + } + + dst_dat = dst[x]; + + if (voodoo->bltCommand & BLIT_SRC_CHROMA) + { + r = (src_dat >> 11); + g = (src_dat >> 5) & 0x3f; + b = src_dat & 0x1f; + + if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && + g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && + b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) + rop |= BLIT_ROP_SRC_PASS; + } + if (voodoo->bltCommand & BLIT_DST_CHROMA) + { + r = (dst_dat >> 11); + g = (dst_dat >> 5) & 0x3f; + b = dst_dat & 0x1f; + + if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && + g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && + b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) + rop |= BLIT_ROP_DST_PASS; + } + + MIX(src_dat, dst_dat, voodoo->bltRop[rop]); + + dst[x] = dst_dat; + +skip_pixel: + voodoo->blt.cur_x++; + } + + if (voodoo->blt.cur_x > voodoo->blt.size_x) + { + voodoo->blt.size_y--; + if (voodoo->blt.size_y >= 0) + { + voodoo->blt.cur_x = 0; + voodoo->blt.dst_y += voodoo->blt.y_dir; + } + } +} + + +void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params) +{ + int y; + int low_y, high_y; + + if (params->fbzMode & (1 << 17)) + { + high_y = voodoo->v_disp - params->clipLowY; + low_y = voodoo->v_disp - params->clipHighY; + } + else + { + low_y = params->clipLowY; + high_y = params->clipHighY; + } + + if (params->fbzMode & FBZ_RGB_WMASK) + { + int r, g, b; + uint16_t col; + + r = ((params->color1 >> 16) >> 3) & 0x1f; + g = ((params->color1 >> 8) >> 2) & 0x3f; + b = (params->color1 >> 3) & 0x1f; + col = b | (g << 5) | (r << 11); + + if (SLI_ENABLED) + { + for (y = low_y; y < high_y; y += 2) + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + cbuf[x] = col; + } + } + else + { + for (y = low_y; y < high_y; y++) + { + if (voodoo->col_tiled) + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 5) * voodoo->row_width + (y & 31) * 128) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + { + int x2 = (x & 63) | ((x >> 6) * 128*32/2); + cbuf[x2] = col; + } + } + else + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + y * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + cbuf[x] = col; + } + } + } + } + if (params->fbzMode & FBZ_DEPTH_WMASK) + { + if (SLI_ENABLED) + { + for (y = low_y; y < high_y; y += 2) + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + abuf[x] = params->zaColor & 0xffff; + } + } + else + { + for (y = low_y; y < high_y; y++) + { + if (voodoo->aux_tiled) + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 5) * voodoo->aux_row_width + (y & 31) * 128) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + { + int x2 = (x & 63) | ((x >> 6) * 128*32/2); + abuf[x2] = params->zaColor & 0xffff; + } + } + else + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + y * voodoo->aux_row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + abuf[x] = params->zaColor & 0xffff; + } + } + } + } +} diff --git a/src/video/vid_voodoo_display.c b/src/video/vid_voodoo_display.c new file mode 100644 index 000000000..fad39ac19 --- /dev/null +++ b/src/video/vid_voodoo_display.c @@ -0,0 +1,647 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_display.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> + + +#ifdef ENABLE_VOODOODISP_LOG +int voodoodisp_do_log = ENABLE_VOODOODISP_LOG; + + +static void +voodoodisp_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoodisp_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoodisp_log(fmt, ...) +#endif + +void voodoo_update_ncc(voodoo_t *voodoo, int tmu) +{ + int tbl; + + for (tbl = 0; tbl < 2; tbl++) + { + int col; + + for (col = 0; col < 256; col++) + { + int y = (col >> 4), i = (col >> 2) & 3, q = col & 3; + int i_r, i_g, i_b; + int q_r, q_g, q_b; + + y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; + + i_r = (voodoo->nccTable[tmu][tbl].i[i] >> 18) & 0x1ff; + if (i_r & 0x100) + i_r |= 0xfffffe00; + i_g = (voodoo->nccTable[tmu][tbl].i[i] >> 9) & 0x1ff; + if (i_g & 0x100) + i_g |= 0xfffffe00; + i_b = voodoo->nccTable[tmu][tbl].i[i] & 0x1ff; + if (i_b & 0x100) + i_b |= 0xfffffe00; + + q_r = (voodoo->nccTable[tmu][tbl].q[q] >> 18) & 0x1ff; + if (q_r & 0x100) + q_r |= 0xfffffe00; + q_g = (voodoo->nccTable[tmu][tbl].q[q] >> 9) & 0x1ff; + if (q_g & 0x100) + q_g |= 0xfffffe00; + q_b = voodoo->nccTable[tmu][tbl].q[q] & 0x1ff; + if (q_b & 0x100) + q_b |= 0xfffffe00; + + voodoo->ncc_lookup[tmu][tbl][col].rgba.r = CLAMP(y + i_r + q_r); + voodoo->ncc_lookup[tmu][tbl][col].rgba.g = CLAMP(y + i_g + q_g); + voodoo->ncc_lookup[tmu][tbl][col].rgba.b = CLAMP(y + i_b + q_b); + voodoo->ncc_lookup[tmu][tbl][col].rgba.a = 0xff; + } + } +} + +void voodoo_pixelclock_update(voodoo_t *voodoo) +{ + int m = (voodoo->dac_pll_regs[0] & 0x7f) + 2; + int n1 = ((voodoo->dac_pll_regs[0] >> 8) & 0x1f) + 2; + int n2 = ((voodoo->dac_pll_regs[0] >> 13) & 0x07); + float t = (14318184.0 * ((float)m / (float)n1)) / (float)(1 << n2); + double clock_const; + int line_length; + + if ((voodoo->dac_data[6] & 0xf0) == 0x20 || + (voodoo->dac_data[6] & 0xf0) == 0x60 || + (voodoo->dac_data[6] & 0xf0) == 0x70) + t /= 2.0f; + + line_length = (voodoo->hSync & 0xff) + ((voodoo->hSync >> 16) & 0x3ff); + +// voodoodisp_log("Pixel clock %f MHz hsync %08x line_length %d\n", t, voodoo->hSync, line_length); + + voodoo->pixel_clock = t; + + clock_const = cpuclock / t; + voodoo->line_time = (uint64_t)((double)line_length * clock_const * (double)(1ull << 32)); +} + +static void voodoo_calc_clutData(voodoo_t *voodoo) +{ + int c; + + for (c = 0; c < 256; c++) + { + voodoo->clutData256[c].r = (voodoo->clutData[c >> 3].r*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].r*(c & 7)) >> 3; + voodoo->clutData256[c].g = (voodoo->clutData[c >> 3].g*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].g*(c & 7)) >> 3; + voodoo->clutData256[c].b = (voodoo->clutData[c >> 3].b*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].b*(c & 7)) >> 3; + } + + for (c = 0; c < 65536; c++) + { + int r = (c >> 8) & 0xf8; + int g = (c >> 3) & 0xfc; + int b = (c << 3) & 0xf8; +// r |= (r >> 5); +// g |= (g >> 6); +// b |= (b >> 5); + + voodoo->video_16to32[c] = (voodoo->clutData256[r].r << 16) | (voodoo->clutData256[g].g << 8) | voodoo->clutData256[b].b; + } +} + + + +#define FILTDIV 256 + +static int FILTCAP, FILTCAPG, FILTCAPB = 0; /* color filter threshold values */ + +void voodoo_generate_filter_v1(voodoo_t *voodoo) +{ + int g, h; + float difference, diffg, diffb; + float thiscol, thiscolg, thiscolb, lined; + float fcr, fcg, fcb; + + fcr = FILTCAP * 5; + fcg = FILTCAPG * 6; + fcb = FILTCAPB * 5; + + for (g=0;g FILTCAP) + difference = FILTCAP; + if (difference < -FILTCAP) + difference = -FILTCAP; + + if (diffg > FILTCAPG) + diffg = FILTCAPG; + if (diffg < -FILTCAPG) + diffg = -FILTCAPG; + + if (diffb > FILTCAPB) + diffb = FILTCAPB; + if (diffb < -FILTCAPB) + diffb = -FILTCAPB; + + // hack - to make it not bleed onto black + //if (g == 0){ + //difference = diffg = diffb = 0; + //} + + if ((difference < fcr) || (-difference > -fcr)) + thiscol = g + (difference / 2); + if ((diffg < fcg) || (-diffg > -fcg)) + thiscolg = g + (diffg / 2); /* need these divides so we can actually undither! */ + if ((diffb < fcb) || (-diffb > -fcb)) + thiscolb = g + (diffb / 2); + + if (thiscol < 0) + thiscol = 0; + if (thiscol > FILTDIV-1) + thiscol = FILTDIV-1; + + if (thiscolg < 0) + thiscolg = 0; + if (thiscolg > FILTDIV-1) + thiscolg = FILTDIV-1; + + if (thiscolb < 0) + thiscolb = 0; + if (thiscolb > FILTDIV-1) + thiscolb = FILTDIV-1; + + voodoo->thefilter[g][h] = thiscol; + voodoo->thefilterg[g][h] = thiscolg; + voodoo->thefilterb[g][h] = thiscolb; + } + + lined = g + 4; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][0] = lined; + voodoo->purpleline[g][2] = lined; + + lined = g + 0; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][1] = lined; + } +} + +void voodoo_generate_filter_v2(voodoo_t *voodoo) +{ + int g, h; + float difference; + float thiscol, thiscolg, thiscolb; + float clr, clg, clb = 0; + float fcr, fcg, fcb = 0; + + // pre-clamping + + fcr = FILTCAP; + fcg = FILTCAPG; + fcb = FILTCAPB; + + if (fcr > 32) fcr = 32; + if (fcg > 32) fcg = 32; + if (fcb > 32) fcb = 32; + + for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into + { + for (h=0;h<256;h++) // pixel 2 - our main pixel + { + float avg; + float avgdiff; + + difference = (float)(g - h); + avg = (float)((g + g + g + g + h) / 5); + avgdiff = avg - (float)((g + h + h + h + h) / 5); + if (avgdiff < 0) avgdiff *= -1; + if (difference < 0) difference *= -1; + + thiscol = thiscolg = thiscolb = g; + + // try lighten + if (h > g) + { + clr = clg = clb = avgdiff; + + if (clr>fcr) clr=fcr; + if (clg>fcg) clg=fcg; + if (clb>fcb) clb=fcb; + + + thiscol = g + clr; + thiscolg = g + clg; + thiscolb = g + clb; + + if (thiscol>g+FILTCAP) + thiscol=g+FILTCAP; + if (thiscolg>g+FILTCAPG) + thiscolg=g+FILTCAPG; + if (thiscolb>g+FILTCAPB) + thiscolb=g+FILTCAPB; + + + if (thiscol>g+avgdiff) + thiscol=g+avgdiff; + if (thiscolg>g+avgdiff) + thiscolg=g+avgdiff; + if (thiscolb>g+avgdiff) + thiscolb=g+avgdiff; + + } + + if (difference > FILTCAP) + thiscol = g; + if (difference > FILTCAPG) + thiscolg = g; + if (difference > FILTCAPB) + thiscolb = g; + + // clamp + if (thiscol < 0) thiscol = 0; + if (thiscolg < 0) thiscolg = 0; + if (thiscolb < 0) thiscolb = 0; + + if (thiscol > 255) thiscol = 255; + if (thiscolg > 255) thiscolg = 255; + if (thiscolb > 255) thiscolb = 255; + + // add to the table + voodoo->thefilter[g][h] = (thiscol); + voodoo->thefilterg[g][h] = (thiscolg); + voodoo->thefilterb[g][h] = (thiscolb); + + // debug the ones that don't give us much of a difference + //if (difference < FILTCAP) + //voodoodisp_log("Voodoofilter: %ix%i - %f difference, %f average difference, R=%f, G=%f, B=%f\n", g, h, difference, avgdiff, thiscol, thiscolg, thiscolb); + } + + } +} + +void voodoo_threshold_check(voodoo_t *voodoo) +{ + int r, g, b; + + if (!voodoo->scrfilterEnabled) + return; /* considered disabled; don't check and generate */ + + /* Check for changes, to generate anew table */ + if (voodoo->scrfilterThreshold != voodoo->scrfilterThresholdOld) + { + r = (voodoo->scrfilterThreshold >> 16) & 0xFF; + g = (voodoo->scrfilterThreshold >> 8 ) & 0xFF; + b = voodoo->scrfilterThreshold & 0xFF; + + FILTCAP = r; + FILTCAPG = g; + FILTCAPB = b; + + voodoodisp_log("Voodoo Filter Threshold Check: %06x - RED %i GREEN %i BLUE %i\n", voodoo->scrfilterThreshold, r, g, b); + + voodoo->scrfilterThresholdOld = voodoo->scrfilterThreshold; + + if (voodoo->type == VOODOO_2) + voodoo_generate_filter_v2(voodoo); + else + voodoo_generate_filter_v1(voodoo); + + if (voodoo->type >= VOODOO_BANSHEE) + voodoo_generate_vb_filters(voodoo, FILTCAP, FILTCAPG); + } +} + +static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) +{ + int x; + + // Scratchpad for avoiding feedback streaks + uint8_t fil3[(voodoo->h_disp) * 3]; + + /* 16 to 32-bit */ + for (x=0; x> 5) & 63) << 2); + fil[x*3+2] = (((src[x] >> 11) & 31) << 3); + + // Copy to our scratchpads + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + } + + + /* lines */ + + if (line & 1) + { + for (x=0; xpurpleline[fil[x*3]][0]; + fil[x*3+1] = voodoo->purpleline[fil[x*3+1]][1]; + fil[x*3+2] = voodoo->purpleline[fil[x*3+2]][2]; + } + } + + + /* filtering time */ + + for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; + fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; + fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; + } + + for (x=1; xthefilterb[fil3[x*3]][fil3[ (x-1) *3]]; + fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x-1) *3+1]]; + fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x-1) *3+2]]; + } + + for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; + fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; + fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; + } + + for (x=0; xthefilterb[fil3[x*3]][fil3[ (x+1) *3]]; + fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x+1) *3+1]]; + fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x+1) *3+2]]; + } +} + + +static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) +{ + int x; + + // Scratchpad for blending filter + uint8_t fil3[(voodoo->h_disp) * 3]; + + /* 16 to 32-bit */ + for (x=0; x> 5) & 63) << 2); + fil3[x*3+2] = fil[x*3+2] = (((src[x] >> 11) & 31) << 3); + } + + /* filtering time */ + + for (x=1; xthefilterb [((src[x+3] & 31) << 3)] [((src[x] & 31) << 3)]; + fil3[(x+3)*3+1] = voodoo->thefilterg [(((src[x+3] >> 5) & 63) << 2)] [(((src[x] >> 5) & 63) << 2)]; + fil3[(x+3)*3+2] = voodoo->thefilter [(((src[x+3] >> 11) & 31) << 3)] [(((src[x] >> 11) & 31) << 3)]; + + fil[(x+2)*3] = voodoo->thefilterb [fil3[(x+2)*3]][((src[x] & 31) << 3)]; + fil[(x+2)*3+1] = voodoo->thefilterg [fil3[(x+2)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil[(x+2)*3+2] = voodoo->thefilter [fil3[(x+2)*3+2]][(((src[x] >> 11) & 31) << 3)]; + + fil3[(x+1)*3] = voodoo->thefilterb [fil[(x+1)*3]][((src[x] & 31) << 3)]; + fil3[(x+1)*3+1] = voodoo->thefilterg [fil[(x+1)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil3[(x+1)*3+2] = voodoo->thefilter [fil[(x+1)*3+2]][(((src[x] >> 11) & 31) << 3)]; + + fil[(x-1)*3] = voodoo->thefilterb [fil3[(x-1)*3]][((src[x] & 31) << 3)]; + fil[(x-1)*3+1] = voodoo->thefilterg [fil3[(x-1)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil[(x-1)*3+2] = voodoo->thefilter [fil3[(x-1)*3+2]][(((src[x] >> 11) & 31) << 3)]; + } + + // unroll for edge cases + + fil3[(column-3)*3] = voodoo->thefilterb [((src[column-3] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-3)*3+1] = voodoo->thefilterg [(((src[column-3] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-3)*3+2] = voodoo->thefilter [(((src[column-3] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-2)*3] = voodoo->thefilterb [((src[column-2] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-2)*3+1] = voodoo->thefilterg [(((src[column-2] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-2)*3+2] = voodoo->thefilter [(((src[column-2] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-1)*3] = voodoo->thefilterb [((src[column-1] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-1)*3+1] = voodoo->thefilterg [(((src[column-1] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-1)*3+2] = voodoo->thefilter [(((src[column-1] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil[(column-2)*3] = voodoo->thefilterb [fil3[(column-2)*3]][((src[column] & 31) << 3)]; + fil[(column-2)*3+1] = voodoo->thefilterg [fil3[(column-2)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil[(column-2)*3+2] = voodoo->thefilter [fil3[(column-2)*3+2]][(((src[column] >> 11) & 31) << 3)]; + + fil[(column-1)*3] = voodoo->thefilterb [fil3[(column-1)*3]][((src[column] & 31) << 3)]; + fil[(column-1)*3+1] = voodoo->thefilterg [fil3[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil[(column-1)*3+2] = voodoo->thefilter [fil3[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-1)*3] = voodoo->thefilterb [fil[(column-1)*3]][((src[column] & 31) << 3)]; + fil3[(column-1)*3+1] = voodoo->thefilterg [fil[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil3[(column-1)*3+2] = voodoo->thefilter [fil[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; +} + +void voodoo_callback(void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) + { + if (voodoo->line < voodoo->v_disp) + { + voodoo_t *draw_voodoo; + int draw_line; + + if (SLI_ENABLED) + { + if (voodoo == voodoo->set->voodoos[1]) + goto skip_draw; + + if (((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) ? 1 : 0) == (voodoo->line & 1)) + draw_voodoo = voodoo; + else + draw_voodoo = voodoo->set->voodoos[1]; + draw_line = voodoo->line >> 1; + } + else + { + if (!(voodoo->fbiInit0 & 1)) + goto skip_draw; + draw_voodoo = voodoo; + draw_line = voodoo->line; + } + + if (draw_voodoo->dirty_line[draw_line]) + { + uint32_t *p = &((uint32_t *)buffer32->line[voodoo->line])[32]; + uint16_t *src = (uint16_t *)&draw_voodoo->fb_mem[draw_voodoo->front_offset + draw_line*draw_voodoo->row_width]; + int x; + + draw_voodoo->dirty_line[draw_line] = 0; + + if (voodoo->line < voodoo->dirty_line_low) + { + voodoo->dirty_line_low = voodoo->line; + video_wait_for_buffer(); + } + if (voodoo->line > voodoo->dirty_line_high) + voodoo->dirty_line_high = voodoo->line; + + if (voodoo->scrfilter && voodoo->scrfilterEnabled) + { + uint8_t fil[(voodoo->h_disp) * 3]; /* interleaved 24-bit RGB */ + + if (voodoo->type == VOODOO_2) + voodoo_filterline_v2(voodoo, fil, voodoo->h_disp, src, voodoo->line); + else + voodoo_filterline_v1(voodoo, fil, voodoo->h_disp, src, voodoo->line); + + for (x = 0; x < voodoo->h_disp; x++) + { + p[x] = (voodoo->clutData256[fil[x*3]].b << 0 | voodoo->clutData256[fil[x*3+1]].g << 8 | voodoo->clutData256[fil[x*3+2]].r << 16); + } + } + else + { + for (x = 0; x < voodoo->h_disp; x++) + { + p[x] = draw_voodoo->video_16to32[src[x]]; + } + } + } + } + } +skip_draw: + if (voodoo->line == voodoo->v_disp) + { +// voodoodisp_log("retrace %i %i %08x %i\n", voodoo->retrace_count, voodoo->swap_interval, voodoo->swap_offset, voodoo->swap_pending); + voodoo->retrace_count++; + if (SLI_ENABLED && (voodoo->fbiInit2 & FBIINIT2_SWAP_ALGORITHM_MASK) == FBIINIT2_SWAP_ALGORITHM_SLI_SYNC) + { + if (voodoo == voodoo->set->voodoos[0]) + { + voodoo_t *voodoo_1 = voodoo->set->voodoos[1]; + + /*Only swap if both Voodoos are waiting for buffer swap*/ + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval) && + voodoo_1->swap_pending && (voodoo_1->retrace_count > voodoo_1->swap_interval)) + { + memset(voodoo->dirty_line, 1, 1024); + voodoo->retrace_count = 0; + voodoo->front_offset = voodoo->swap_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + + memset(voodoo_1->dirty_line, 1, 1024); + voodoo_1->retrace_count = 0; + voodoo_1->front_offset = voodoo_1->swap_offset; + if (voodoo_1->swap_count > 0) + voodoo_1->swap_count--; + voodoo_1->swap_pending = 0; + + thread_set_event(voodoo->wake_fifo_thread); + thread_set_event(voodoo_1->wake_fifo_thread); + + voodoo->frame_count++; + voodoo_1->frame_count++; + } + } + } + else + { + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) + { + voodoo->front_offset = voodoo->swap_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + + memset(voodoo->dirty_line, 1, 1024); + voodoo->retrace_count = 0; + thread_set_event(voodoo->wake_fifo_thread); + voodoo->frame_count++; + } + } + voodoo->v_retrace = 1; + } + voodoo->line++; + + if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) + { + if (voodoo->line == voodoo->v_disp) + { + if (voodoo->dirty_line_high > voodoo->dirty_line_low) + svga_doblit(0, voodoo->v_disp, voodoo->h_disp, voodoo->v_disp-1, voodoo->svga); + if (voodoo->clutData_dirty) + { + voodoo->clutData_dirty = 0; + voodoo_calc_clutData(voodoo); + } + voodoo->dirty_line_high = -1; + voodoo->dirty_line_low = 2000; + } + } + + if (voodoo->line >= voodoo->v_total) + { + voodoo->line = 0; + voodoo->v_retrace = 0; + } + if (voodoo->line_time) + timer_advance_u64(&voodoo->timer, voodoo->line_time); + else + timer_advance_u64(&voodoo->timer, TIMER_USEC * 32); +} diff --git a/src/video/vid_voodoo_fb.c b/src/video/vid_voodoo_fb.c new file mode 100644 index 000000000..6dfed7e7b --- /dev/null +++ b/src/video/vid_voodoo_fb.c @@ -0,0 +1,493 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_fb.h> + + +#ifdef ENABLE_VOODOO_FB_LOG +int voodoo_fb_do_log = ENABLE_VOODOO_FB_LOG; + +static void +voodoo_fb_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoo_fb_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoo_fb_log(fmt, ...) +#endif + + +uint16_t voodoo_fb_readw(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + int x, y; + uint32_t read_addr; + uint16_t temp; + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + + y >>= 1; + } + + if (voodoo->col_tiled) + read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width); + + if (read_addr > voodoo->fb_mask) + return 0xffff; + + temp = *(uint16_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); + +// voodoo_fb_log("voodoo_fb_readw : %08X %08X %i %i %08X %08X %08x:%08x %i\n", addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++); + return temp; +} +uint32_t voodoo_fb_readl(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + int x, y; + uint32_t read_addr; + uint32_t temp; + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + + y >>= 1; + } + + if (voodoo->col_tiled) + read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width); + + if (read_addr > voodoo->fb_mask) + return 0xffffffff; + + temp = *(uint32_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); + +// voodoo_fb_log("voodoo_fb_readl : %08X %08x %08X x=%i y=%i %08X %08X %08x:%08x %i ro=%08x rw=%i\n", addr, read_addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++, voodoo->fb_read_offset, voodoo->row_width); + return temp; +} + +static inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y) +{ + int r, g, b; + + if (dither) + { + if (dither2x2) + { + r = dither_rb2x2[col.r][y & 1][x & 1]; + g = dither_g2x2[col.g][y & 1][x & 1]; + b = dither_rb2x2[col.b][y & 1][x & 1]; + } + else + { + r = dither_rb[col.r][y & 3][x & 3]; + g = dither_g[col.g][y & 3][x & 3]; + b = dither_rb[col.b][y & 3][x & 3]; + } + } + else + { + r = col.r >> 3; + g = col.g >> 2; + b = col.b >> 3; + } + + return b | (g << 5) | (r << 11); +} + +void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + voodoo_params_t *params = &voodoo->params; + int x, y; + uint32_t write_addr, write_addr_aux; + rgba8_t colour_data; + uint16_t depth_data; + uint8_t alpha_data; + int write_mask = 0; + + colour_data.r = colour_data.g = colour_data.b = colour_data.a = 0; + + depth_data = voodoo->params.zaColor & 0xffff; + alpha_data = voodoo->params.zaColor >> 24; + +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// voodoo_fb_log("voodoo_fb_writew : %08X %04X\n", addr, val); + + + switch (voodoo->lfbMode & LFB_FORMAT_MASK) + { + case LFB_FORMAT_RGB565: + colour_data = rgb565[val]; + alpha_data = 0xff; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_RGB555: + colour_data = argb1555[val]; + alpha_data = 0xff; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_ARGB1555: + colour_data = argb1555[val]; + alpha_data = colour_data.a; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_DEPTH: + depth_data = val; + write_mask = LFB_WRITE_DEPTH; + break; + + default: + fatal("voodoo_fb_writew : bad LFB format %08X\n", voodoo->lfbMode); + } + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) + return; + y >>= 1; + } + + + if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048) + voodoo->dirty_line[y] = 1; + + if (voodoo->col_tiled) + write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); + if (voodoo->aux_tiled) + write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// voodoo_fb_log("fb_writew %08x %i %i %i %08x\n", addr, x, y, voodoo->row_width, write_addr); + + if (voodoo->lfbMode & 0x100) + { + { + rgba8_t write_data = colour_data; + uint16_t new_depth = depth_data; + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); + + DEPTH_TEST(new_depth); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + write_data.r == params->chromaKey_r && + write_data.g == params->chromaKey_g && + write_data.b == params->chromaKey_b) + goto skip_pixel; + + if (params->fogMode & FOG_ENABLE) + { + int32_t z = new_depth << 12; + int64_t w_depth = (int64_t)(int32_t)new_depth; + int32_t ia = alpha_data << 12; + + APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); + } + + if (params->alphaMode & 1) + ALPHA_TEST(alpha_data); + + if (params->alphaMode & (1 << 4)) + { + uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); + int dest_r, dest_g, dest_b, dest_a; + + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data); + } + + if (params->fbzMode & FBZ_RGB_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, x >> 1, y); + if (params->fbzMode & FBZ_DEPTH_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; + +skip_pixel: + x = x; + } + } + else + { + if (write_mask & LFB_WRITE_COLOUR) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data, x >> 1, y); + if (write_mask & LFB_WRITE_DEPTH) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data; + } +} + + +void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + voodoo_params_t *params = &voodoo->params; + int x, y; + uint32_t write_addr, write_addr_aux; + rgba8_t colour_data[2]; + uint16_t depth_data[2]; + uint8_t alpha_data[2]; + int write_mask = 0, count = 1; + + depth_data[0] = depth_data[1] = voodoo->params.zaColor & 0xffff; + alpha_data[0] = alpha_data[1] = voodoo->params.zaColor >> 24; +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// voodoo_fb_log("voodoo_fb_writel : %08X %08X\n", addr, val); + + switch (voodoo->lfbMode & LFB_FORMAT_MASK) + { + case LFB_FORMAT_RGB565: + colour_data[0] = rgb565[val & 0xffff]; + colour_data[1] = rgb565[val >> 16]; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + case LFB_FORMAT_RGB555: + colour_data[0] = argb1555[val & 0xffff]; + colour_data[1] = argb1555[val >> 16]; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + case LFB_FORMAT_ARGB1555: + colour_data[0] = argb1555[val & 0xffff]; + alpha_data[0] = colour_data[0].a; + colour_data[1] = argb1555[val >> 16]; + alpha_data[1] = colour_data[1].a; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + + case LFB_FORMAT_ARGB8888: + colour_data[0].b = val & 0xff; + colour_data[0].g = (val >> 8) & 0xff; + colour_data[0].r = (val >> 16) & 0xff; + alpha_data[0] = (val >> 24) & 0xff; + write_mask = LFB_WRITE_COLOUR; + addr >>= 1; + break; + + case LFB_FORMAT_DEPTH: + depth_data[0] = val; + depth_data[1] = val >> 16; + write_mask = LFB_WRITE_DEPTH; + count = 2; + break; + + default: + fatal("voodoo_fb_writel : bad LFB format %08X\n", voodoo->lfbMode); + } + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) + return; + y >>= 1; + } + + if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048) + voodoo->dirty_line[y] = 1; + + if (voodoo->col_tiled) + write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); + if (voodoo->aux_tiled) + write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// voodoo_fb_log("fb_writel %08x x=%i y=%i rw=%i %08x wo=%08x\n", addr, x, y, voodoo->row_width, write_addr, voodoo->fb_write_offset); + + if (voodoo->lfbMode & 0x100) + { + int c; + + for (c = 0; c < count; c++) + { + rgba8_t write_data = colour_data[c]; + uint16_t new_depth = depth_data[c]; + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); + + DEPTH_TEST(new_depth); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + write_data.r == params->chromaKey_r && + write_data.g == params->chromaKey_g && + write_data.b == params->chromaKey_b) + goto skip_pixel; + + if (params->fogMode & FOG_ENABLE) + { + int32_t z = new_depth << 12; + int64_t w_depth = new_depth; + int32_t ia = alpha_data[c] << 12; + + APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); + } + + if (params->alphaMode & 1) + ALPHA_TEST(alpha_data[c]); + + if (params->alphaMode & (1 << 4)) + { + uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); + int dest_r, dest_g, dest_b, dest_a; + + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data[c]); + } + + if (params->fbzMode & FBZ_RGB_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, (x >> 1) + c, y); + if (params->fbzMode & FBZ_DEPTH_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; + +skip_pixel: + write_addr += 2; + write_addr_aux += 2; + } + } + else + { + int c; + + for (c = 0; c < count; c++) + { + if (write_mask & LFB_WRITE_COLOUR) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data[c], (x >> 1) + c, y); + if (write_mask & LFB_WRITE_DEPTH) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data[c]; + + write_addr += 2; + write_addr_aux += 2; + } + } +} diff --git a/src/video/vid_voodoo_fifo.c b/src/video/vid_voodoo_fifo.c new file mode 100644 index 000000000..c6e09ad37 --- /dev/null +++ b/src/video/vid_voodoo_fifo.c @@ -0,0 +1,526 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_banshee_blitter.h> +#include <86box/vid_voodoo_fb.h> +#include <86box/vid_voodoo_fifo.h> +#include <86box/vid_voodoo_reg.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_texture.h> + + +#ifdef ENABLE_VOODOO_FIFO_LOG +int voodoo_fifo_do_log = ENABLE_VOODOO_FIFO_LOG; + +static void +voodoo_fifo_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoo_fifo_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoo_fifo_log(fmt, ...) +#endif + +#define WAKE_DELAY (TIMER_USEC * 100) +void voodoo_wake_fifo_thread(voodoo_t *voodoo) +{ + if (!timer_is_enabled(&voodoo->wake_timer)) + { + /*Don't wake FIFO thread immediately - if we do that it will probably + process one word and go back to sleep, requiring it to be woken on + almost every write. Instead, wait a short while so that the CPU + emulation writes more data so we have more batched-up work.*/ + timer_set_delay_u64(&voodoo->wake_timer, WAKE_DELAY); + } +} + +void voodoo_wake_fifo_thread_now(voodoo_t *voodoo) +{ + thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +void voodoo_wake_timer(void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) +{ + fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK]; + + while (FIFO_FULL) + { + thread_reset_event(voodoo->fifo_not_full_event); + if (FIFO_FULL) + { + thread_wait_event(voodoo->fifo_not_full_event, 1); /*Wait for room in ringbuffer*/ + if (FIFO_FULL) + voodoo_wake_fifo_thread_now(voodoo); + } + } + + fifo->val = val; + fifo->addr_type = addr_type; + + voodoo->fifo_write_idx++; + + if (FIFO_ENTRIES > 0xe000) + voodoo_wake_fifo_thread(voodoo); +} + +void voodoo_flush(voodoo_t *voodoo) +{ + voodoo->flush = 1; + while (!FIFO_EMPTY) + { + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_not_full_event, 1); + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; +} + +void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo) +{ + voodoo_wake_fifo_thread(voodoo); + if (SLI_ENABLED && voodoo->type != VOODOO_2 && set->voodoos[0] == voodoo) + voodoo_wake_fifo_thread(set->voodoos[1]); +} + +void voodoo_wait_for_swap_complete(voodoo_t *voodoo) +{ + while (voodoo->swap_pending) + { + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + + if ((voodoo->swap_pending && voodoo->flush) || FIFO_FULL) + { + /*Main thread is waiting for FIFO to empty, so skip vsync wait and just swap*/ + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->front_offset = voodoo->params.front_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + break; + } + } +} + + +static uint32_t cmdfifo_get(voodoo_t *voodoo) +{ + uint32_t val; + + while (voodoo->cmdfifo_depth_rd == voodoo->cmdfifo_depth_wr) + { + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + } + + val = *(uint32_t *)&voodoo->fb_mem[voodoo->cmdfifo_rp & voodoo->fb_mask]; + + voodoo->cmdfifo_depth_rd++; + voodoo->cmdfifo_rp += 4; + +// voodoo_fifo_log(" CMDFIFO get %08x\n", val); + return val; +} + +static inline float cmdfifo_get_f(voodoo_t *voodoo) +{ + union + { + uint32_t i; + float f; + } tempif; + + tempif.i = cmdfifo_get(voodoo); + return tempif.f; +} + +enum +{ + CMDFIFO3_PC_MASK_RGB = (1 << 10), + CMDFIFO3_PC_MASK_ALPHA = (1 << 11), + CMDFIFO3_PC_MASK_Z = (1 << 12), + CMDFIFO3_PC_MASK_Wb = (1 << 13), + CMDFIFO3_PC_MASK_W0 = (1 << 14), + CMDFIFO3_PC_MASK_S0_T0 = (1 << 15), + CMDFIFO3_PC_MASK_W1 = (1 << 16), + CMDFIFO3_PC_MASK_S1_T1 = (1 << 17), + + CMDFIFO3_PC = (1 << 28) +}; + +void voodoo_fifo_thread(void *param) +{ + voodoo_t *voodoo = (voodoo_t *)param; + + while (1) + { + thread_set_event(voodoo->fifo_not_full_event); + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + voodoo->voodoo_busy = 1; + while (!FIFO_EMPTY) + { + uint64_t start_time = plat_timer_read(); + uint64_t end_time; + fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + + switch (fifo->addr_type & FIFO_TYPE) + { + case FIFO_WRITEL_REG: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_REG) + { + voodoo_reg_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEW_FB: + voodoo_wait_for_render_thread_idle(voodoo); + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEW_FB) + { + voodoo_fb_writew(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_FB: + voodoo_wait_for_render_thread_idle(voodoo); + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_FB) + { + voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_TEX: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_TEX) + { + if (!(fifo->addr_type & 0x400000)) + voodoo_tex_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_2DREG: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_2DREG) + { + voodoo_2d_reg_writel(voodoo, fifo->addr_type & FIFO_ADDR, fifo->val); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + + default: + fatal("Unknown fifo entry %08x\n", fifo->addr_type); + } + + if (FIFO_ENTRIES > 0xe000) + thread_set_event(voodoo->fifo_not_full_event); + + end_time = plat_timer_read(); + voodoo->time += end_time - start_time; + } + + while (voodoo->cmdfifo_enabled && voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) + { + uint64_t start_time = plat_timer_read(); + uint64_t end_time; + uint32_t header = cmdfifo_get(voodoo); + uint32_t addr; + uint32_t mask; + int smode; + int num; + int num_verticies; + int v_num; + +// voodoo_fifo_log(" CMDFIFO header %08x at %08x\n", header, voodoo->cmdfifo_rp); + + switch (header & 7) + { + case 0: +// voodoo_fifo_log("CMDFIFO0\n"); + switch ((header >> 3) & 7) + { + case 0: /*NOP*/ + break; + + case 3: /*JMP local frame buffer*/ + voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc; +// voodoo_fifo_log("JMP to %08x %04x\n", voodoo->cmdfifo_rp, header); + break; + + default: + fatal("Bad CMDFIFO0 %08x\n", header); + } + break; + + case 1: + num = header >> 16; + addr = (header & 0x7ff8) >> 1; +// voodoo_fifo_log("CMDFIFO1 addr=%08x\n",addr); + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE) + { +// if (voodoo->type != VOODOO_BANSHEE) +// fatal("CMDFIFO1: Not Banshee\n"); +// voodoo_fifo_log("CMDFIFO1: write %08x %08x\n", addr, val); + voodoo_2d_reg_writel(voodoo, addr, val); + } + else + { + if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || + (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) + voodoo->cmd_written_fifo++; + + if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) + voodoo->cmd_written_fifo++; + voodoo_reg_writel(addr, val, voodoo); + } + + if (header & (1 << 15)) + addr += 4; + } + break; + + case 2: + if (voodoo->type < VOODOO_BANSHEE) + fatal("CMDFIFO2: Not Banshee\n"); + mask = (header >> 3); + addr = 8; + while (mask) + { + if (mask & 1) + { + uint32_t val = cmdfifo_get(voodoo); + + voodoo_2d_reg_writel(voodoo, addr, val); + } + + addr += 4; + mask >>= 1; + } + break; + + case 3: + num = (header >> 29) & 7; + mask = header;//(header >> 10) & 0xff; + smode = (header >> 22) & 0xf; + voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); + num_verticies = (header >> 6) & 0xf; + v_num = 0; + if (((header >> 3) & 7) == 2) + v_num = 1; +// voodoo_fifo_log("CMDFIFO3: num=%i verts=%i mask=%02x\n", num, num_verticies, (header >> 10) & 0xff); +// voodoo_fifo_log("CMDFIFO3 %02x %i\n", (header >> 10), (header >> 3) & 7); + + while (num_verticies--) + { + voodoo->verts[3].sVx = cmdfifo_get_f(voodoo); + voodoo->verts[3].sVy = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_RGB) + { + if (header & CMDFIFO3_PC) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo->verts[3].sBlue = (float)(val & 0xff); + voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); + voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); + voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); + } + else + { + voodoo->verts[3].sRed = cmdfifo_get_f(voodoo); + voodoo->verts[3].sGreen = cmdfifo_get_f(voodoo); + voodoo->verts[3].sBlue = cmdfifo_get_f(voodoo); + } + } + if ((mask & CMDFIFO3_PC_MASK_ALPHA) && !(header & CMDFIFO3_PC)) + voodoo->verts[3].sAlpha = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_Z) + voodoo->verts[3].sVz = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_Wb) + voodoo->verts[3].sWb = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_W0) + voodoo->verts[3].sW0 = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_S0_T0) + { + voodoo->verts[3].sS0 = cmdfifo_get_f(voodoo); + voodoo->verts[3].sT0 = cmdfifo_get_f(voodoo); + } + if (mask & CMDFIFO3_PC_MASK_W1) + voodoo->verts[3].sW1 = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_S1_T1) + { + voodoo->verts[3].sS1 = cmdfifo_get_f(voodoo); + voodoo->verts[3].sT1 = cmdfifo_get_f(voodoo); + } + if (v_num) + voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo); + else + voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo); + v_num++; + if (v_num == 3 && ((header >> 3) & 7) == 0) + v_num = 0; + } + break; + + case 4: + num = (header >> 29) & 7; + mask = (header >> 15) & 0x3fff; + addr = (header & 0x7ff8) >> 1; +// voodoo_fifo_log("CMDFIFO4 addr=%08x\n",addr); + while (mask) + { + if (mask & 1) + { + uint32_t val = cmdfifo_get(voodoo); + + if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE) + { + if (voodoo->type < VOODOO_BANSHEE) + fatal("CMDFIFO1: Not Banshee\n"); +// voodoo_fifo_log("CMDFIFO1: write %08x %08x\n", addr, val); + voodoo_2d_reg_writel(voodoo, addr, val); + } + else + { + if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || + (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) + voodoo->cmd_written_fifo++; + + if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) + voodoo->cmd_written_fifo++; + voodoo_reg_writel(addr, val, voodoo); + } + } + + addr += 4; + mask >>= 1; + } + while (num--) + cmdfifo_get(voodoo); + break; + + case 5: +// if (header & 0x3fc00000) +// fatal("CMDFIFO packet 5 has byte disables set %08x\n", header); + num = (header >> 3) & 0x7ffff; + addr = cmdfifo_get(voodoo) & 0xffffff; + if (!num) + num = 1; +// voodoo_fifo_log("CMDFIFO5 addr=%08x num=%i\n", addr, num); + switch (header >> 30) + { + case 0: /*Linear framebuffer (Banshee)*/ + if (voodoo->texture_present[0][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// voodoo_fifo_log("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, 0); + } + if (voodoo->texture_present[1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// voodoo_fifo_log("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, 1); + } + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + if (addr <= voodoo->fb_mask) + *(uint32_t *)&voodoo->fb_mem[addr] = val; + addr += 4; + } + break; + case 2: /*Framebuffer*/ + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo_fb_writel(addr, val, voodoo); + addr += 4; + } + break; + case 3: /*Texture*/ + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo_tex_writel(addr, val, voodoo); + addr += 4; + } + break; + + default: + fatal("CMDFIFO packet 5 bad space %08x %08x\n", header, voodoo->cmdfifo_rp); + } + break; + + default: + fatal("Bad CMDFIFO packet %08x %08x\n", header, voodoo->cmdfifo_rp); + } + + end_time = plat_timer_read(); + voodoo->time += end_time - start_time; + } + voodoo->voodoo_busy = 0; + } +} diff --git a/src/video/vid_voodoo_reg.c b/src/video/vid_voodoo_reg.c new file mode 100644 index 000000000..2daa3975c --- /dev/null +++ b/src/video/vid_voodoo_reg.c @@ -0,0 +1,1361 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_banshee.h> +#include <86box/vid_voodoo_blitter.h> +#include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_fifo.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_setup.h> +#include <86box/vid_voodoo_texture.h> + + +enum +{ + CHIP_FBI = 0x1, + CHIP_TREX0 = 0x2, + CHIP_TREX1 = 0x4, + CHIP_TREX2 = 0x8 +}; + + +#ifdef ENABLE_VOODOO_REG_LOG +int voodoo_reg_do_log = ENABLE_VOODOO_REG_LOG; + +static void +voodoo_reg_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoo_reg_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoo_reg_log(fmt, ...) +#endif + + +void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + union + { + uint32_t i; + float f; + } tempif; + int ad21 = addr & (1 << 21); + int chip = (addr >> 10) & 0xf; + if (!chip) + chip = 0xf; + + tempif.i = val; +//voodoo_reg_log("voodoo_reg_write_l: addr=%08x val=%08x(%f) chip=%x\n", addr, val, tempif.f, chip); + addr &= 0x3fc; + + if ((voodoo->fbiInit3 & FBIINIT3_REMAP) && addr < 0x100 && ad21) + addr |= 0x400; + switch (addr) + { + case SST_swapbufferCMD: + if (voodoo->type >= VOODOO_BANSHEE) + { +// voodoo_reg_log("swapbufferCMD %08x %08x\n", val, voodoo->leftOverlayBuf); + + voodoo_wait_for_render_thread_idle(voodoo); + if (!(val & 1)) + { + banshee_set_overlay_addr(voodoo->p, voodoo->leftOverlayBuf); + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->frame_count++; + } + else if (TRIPLE_BUFFER) + { + if (voodoo->swap_pending) + voodoo_wait_for_swap_complete(voodoo); + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->leftOverlayBuf; + voodoo->swap_pending = 1; + } + else + { + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->leftOverlayBuf; + voodoo->swap_pending = 1; + + voodoo_wait_for_swap_complete(voodoo); + } + + voodoo->cmd_read++; + break; + } + + if (TRIPLE_BUFFER) + { + voodoo->disp_buffer = (voodoo->disp_buffer + 1) % 3; + voodoo->draw_buffer = (voodoo->draw_buffer + 1) % 3; + } + else + { + voodoo->disp_buffer = !voodoo->disp_buffer; + voodoo->draw_buffer = !voodoo->draw_buffer; + } + voodoo_recalc(voodoo); + + voodoo->params.swapbufferCMD = val; + +// voodoo_reg_log("Swap buffer %08x %d %p %i\n", val, voodoo->swap_count, &voodoo->swap_count, (voodoo == voodoo->set->voodoos[1]) ? 1 : 0); +// voodoo->front_offset = params->front_offset; + voodoo_wait_for_render_thread_idle(voodoo); + if (!(val & 1)) + { + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->front_offset = voodoo->params.front_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + } + else if (TRIPLE_BUFFER) + { + if (voodoo->swap_pending) + voodoo_wait_for_swap_complete(voodoo); + + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->params.front_offset; + voodoo->swap_pending = 1; + } + else + { + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->params.front_offset; + voodoo->swap_pending = 1; + + voodoo_wait_for_swap_complete(voodoo); + } + voodoo->cmd_read++; + break; + + case SST_vertexAx: case SST_remap_vertexAx: + voodoo->params.vertexAx = val & 0xffff; + break; + case SST_vertexAy: case SST_remap_vertexAy: + voodoo->params.vertexAy = val & 0xffff; + break; + case SST_vertexBx: case SST_remap_vertexBx: + voodoo->params.vertexBx = val & 0xffff; + break; + case SST_vertexBy: case SST_remap_vertexBy: + voodoo->params.vertexBy = val & 0xffff; + break; + case SST_vertexCx: case SST_remap_vertexCx: + voodoo->params.vertexCx = val & 0xffff; + break; + case SST_vertexCy: case SST_remap_vertexCy: + voodoo->params.vertexCy = val & 0xffff; + break; + + case SST_startR: case SST_remap_startR: + voodoo->params.startR = val & 0xffffff; + break; + case SST_startG: case SST_remap_startG: + voodoo->params.startG = val & 0xffffff; + break; + case SST_startB: case SST_remap_startB: + voodoo->params.startB = val & 0xffffff; + break; + case SST_startZ: case SST_remap_startZ: + voodoo->params.startZ = val; + break; + case SST_startA: case SST_remap_startA: + voodoo->params.startA = val & 0xffffff; + break; + case SST_startS: case SST_remap_startS: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startS = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = ((int64_t)(int32_t)val) << 14; + break; + case SST_startT: case SST_remap_startT: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startT = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = ((int64_t)(int32_t)val) << 14; + break; + case SST_startW: case SST_remap_startW: + if (chip & CHIP_FBI) + voodoo->params.startW = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startW = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(int32_t)val << 2; + break; + + case SST_dRdX: case SST_remap_dRdX: + voodoo->params.dRdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dGdX: case SST_remap_dGdX: + voodoo->params.dGdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dBdX: case SST_remap_dBdX: + voodoo->params.dBdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dZdX: case SST_remap_dZdX: + voodoo->params.dZdX = val; + break; + case SST_dAdX: case SST_remap_dAdX: + voodoo->params.dAdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dSdX: case SST_remap_dSdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = ((int64_t)(int32_t)val) << 14; + break; + case SST_dTdX: case SST_remap_dTdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = ((int64_t)(int32_t)val) << 14; + break; + case SST_dWdX: case SST_remap_dWdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdX = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(int32_t)val << 2; + if (chip & CHIP_FBI) + voodoo->params.dWdX = (int64_t)(int32_t)val << 2; + break; + + case SST_dRdY: case SST_remap_dRdY: + voodoo->params.dRdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dGdY: case SST_remap_dGdY: + voodoo->params.dGdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dBdY: case SST_remap_dBdY: + voodoo->params.dBdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dZdY: case SST_remap_dZdY: + voodoo->params.dZdY = val; + break; + case SST_dAdY: case SST_remap_dAdY: + voodoo->params.dAdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dSdY: case SST_remap_dSdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = ((int64_t)(int32_t)val) << 14; + break; + case SST_dTdY: case SST_remap_dTdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = ((int64_t)(int32_t)val) << 14; + break; + case SST_dWdY: case SST_remap_dWdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdY = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(int32_t)val << 2; + if (chip & CHIP_FBI) + voodoo->params.dWdY = (int64_t)(int32_t)val << 2; + break; + + case SST_triangleCMD: case SST_remap_triangleCMD: + voodoo->params.sign = val & (1 << 31); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); + + voodoo->cmd_read++; + break; + + case SST_fvertexAx: case SST_remap_fvertexAx: + voodoo->fvertexAx.i = val; + voodoo->params.vertexAx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAx.f * 16.0f) & 0xffff; + break; + case SST_fvertexAy: case SST_remap_fvertexAy: + voodoo->fvertexAy.i = val; + voodoo->params.vertexAy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAy.f * 16.0f) & 0xffff; + break; + case SST_fvertexBx: case SST_remap_fvertexBx: + voodoo->fvertexBx.i = val; + voodoo->params.vertexBx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBx.f * 16.0f) & 0xffff; + break; + case SST_fvertexBy: case SST_remap_fvertexBy: + voodoo->fvertexBy.i = val; + voodoo->params.vertexBy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBy.f * 16.0f) & 0xffff; + break; + case SST_fvertexCx: case SST_remap_fvertexCx: + voodoo->fvertexCx.i = val; + voodoo->params.vertexCx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCx.f * 16.0f) & 0xffff; + break; + case SST_fvertexCy: case SST_remap_fvertexCy: + voodoo->fvertexCy.i = val; + voodoo->params.vertexCy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCy.f * 16.0f) & 0xffff; + break; + + case SST_fstartR: case SST_remap_fstartR: + tempif.i = val; + voodoo->params.startR = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartG: case SST_remap_fstartG: + tempif.i = val; + voodoo->params.startG = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartB: case SST_remap_fstartB: + tempif.i = val; + voodoo->params.startB = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartZ: case SST_remap_fstartZ: + tempif.i = val; + voodoo->params.startZ = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartA: case SST_remap_fstartA: + tempif.i = val; + voodoo->params.startA = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartS: case SST_remap_fstartS: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startS = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fstartT: case SST_remap_fstartT: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startT = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fstartW: case SST_remap_fstartW: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startW = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.startW = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_fdRdX: case SST_remap_fdRdX: + tempif.i = val; + voodoo->params.dRdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdGdX: case SST_remap_fdGdX: + tempif.i = val; + voodoo->params.dGdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdBdX: case SST_remap_fdBdX: + tempif.i = val; + voodoo->params.dBdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdZdX: case SST_remap_fdZdX: + tempif.i = val; + voodoo->params.dZdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdAdX: case SST_remap_fdAdX: + tempif.i = val; + voodoo->params.dAdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdSdX: case SST_remap_fdSdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdTdX: case SST_remap_fdTdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdWdX: case SST_remap_fdWdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.dWdX = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_fdRdY: case SST_remap_fdRdY: + tempif.i = val; + voodoo->params.dRdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdGdY: case SST_remap_fdGdY: + tempif.i = val; + voodoo->params.dGdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdBdY: case SST_remap_fdBdY: + tempif.i = val; + voodoo->params.dBdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdZdY: case SST_remap_fdZdY: + tempif.i = val; + voodoo->params.dZdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdAdY: case SST_remap_fdAdY: + tempif.i = val; + voodoo->params.dAdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdSdY: case SST_remap_fdSdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdTdY: case SST_remap_fdTdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdWdY: case SST_remap_fdWdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.dWdY = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_ftriangleCMD: + voodoo->params.sign = val & (1 << 31); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); + + voodoo->cmd_read++; + break; + + case SST_fbzColorPath: + voodoo->params.fbzColorPath = val; + voodoo->rgb_sel = val & 3; + break; + + case SST_fogMode: + voodoo->params.fogMode = val; + break; + case SST_alphaMode: + voodoo->params.alphaMode = val; + break; + case SST_fbzMode: + voodoo->params.fbzMode = val; + voodoo_recalc(voodoo); + break; + case SST_lfbMode: + voodoo->lfbMode = val; + voodoo_recalc(voodoo); + break; + + case SST_clipLeftRight: + if (voodoo->type >= VOODOO_2) + { + voodoo->params.clipRight = val & 0xfff; + voodoo->params.clipLeft = (val >> 16) & 0xfff; + } + else + { + voodoo->params.clipRight = val & 0x3ff; + voodoo->params.clipLeft = (val >> 16) & 0x3ff; + } + break; + case SST_clipLowYHighY: + if (voodoo->type >= VOODOO_2) + { + voodoo->params.clipHighY = val & 0xfff; + voodoo->params.clipLowY = (val >> 16) & 0xfff; + } + else + { + voodoo->params.clipHighY = val & 0x3ff; + voodoo->params.clipLowY = (val >> 16) & 0x3ff; + } + break; + + case SST_nopCMD: + voodoo->cmd_read++; + voodoo->fbiPixelsIn = 0; + voodoo->fbiChromaFail = 0; + voodoo->fbiZFuncFail = 0; + voodoo->fbiAFuncFail = 0; + voodoo->fbiPixelsOut = 0; + break; + case SST_fastfillCMD: + voodoo_wait_for_render_thread_idle(voodoo); + voodoo_fastfill(voodoo, &voodoo->params); + voodoo->cmd_read++; + break; + + case SST_fogColor: + voodoo->params.fogColor.r = (val >> 16) & 0xff; + voodoo->params.fogColor.g = (val >> 8) & 0xff; + voodoo->params.fogColor.b = val & 0xff; + break; + + case SST_zaColor: + voodoo->params.zaColor = val; + break; + case SST_chromaKey: + voodoo->params.chromaKey_r = (val >> 16) & 0xff; + voodoo->params.chromaKey_g = (val >> 8) & 0xff; + voodoo->params.chromaKey_b = val & 0xff; + voodoo->params.chromaKey = val & 0xffffff; + break; + case SST_stipple: + voodoo->params.stipple = val; + break; + case SST_color0: + voodoo->params.color0 = val; + break; + case SST_color1: + voodoo->params.color1 = val; + break; + + case SST_fogTable00: case SST_fogTable01: case SST_fogTable02: case SST_fogTable03: + case SST_fogTable04: case SST_fogTable05: case SST_fogTable06: case SST_fogTable07: + case SST_fogTable08: case SST_fogTable09: case SST_fogTable0a: case SST_fogTable0b: + case SST_fogTable0c: case SST_fogTable0d: case SST_fogTable0e: case SST_fogTable0f: + case SST_fogTable10: case SST_fogTable11: case SST_fogTable12: case SST_fogTable13: + case SST_fogTable14: case SST_fogTable15: case SST_fogTable16: case SST_fogTable17: + case SST_fogTable18: case SST_fogTable19: case SST_fogTable1a: case SST_fogTable1b: + case SST_fogTable1c: case SST_fogTable1d: case SST_fogTable1e: case SST_fogTable1f: + addr = (addr - SST_fogTable00) >> 1; + voodoo->params.fogTable[addr].dfog = val & 0xff; + voodoo->params.fogTable[addr].fog = (val >> 8) & 0xff; + voodoo->params.fogTable[addr+1].dfog = (val >> 16) & 0xff; + voodoo->params.fogTable[addr+1].fog = (val >> 24) & 0xff; + break; + + case SST_clipLeftRight1: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.clipRight1 = val & 0xfff; + voodoo->params.clipLeft1 = (val >> 16) & 0xfff; + } + break; + case SST_clipTopBottom1: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.clipHighY1 = val & 0xfff; + voodoo->params.clipLowY1 = (val >> 16) & 0xfff; + } + break; + + case SST_colBufferAddr: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.draw_offset = val & 0xfffff0; + voodoo->fb_write_offset = voodoo->params.draw_offset; +// voodoo_reg_log("colorBufferAddr=%06x\n", voodoo->params.draw_offset); + } + break; + case SST_colBufferStride: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->col_tiled = val & (1 << 15); + voodoo->params.col_tiled = voodoo->col_tiled; + if (voodoo->col_tiled) + { + voodoo->row_width = (val & 0x7f) * 128*32; +// voodoo_reg_log("colBufferStride tiled = %i bytes, tiled %08x\n", voodoo->row_width, val); + } + else + { + voodoo->row_width = val & 0x3fff; +// voodoo_reg_log("colBufferStride linear = %i bytes, linear\n", voodoo->row_width); + } + voodoo->params.row_width = voodoo->row_width; + } + break; + case SST_auxBufferAddr: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.aux_offset = val & 0xfffff0; +// pclog("auxBufferAddr=%06x\n", voodoo->params.aux_offset); + } + break; + case SST_auxBufferStride: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->aux_tiled = val & (1 << 15); + voodoo->params.aux_tiled = voodoo->aux_tiled; + if (voodoo->aux_tiled) + { + voodoo->aux_row_width = (val & 0x7f) * 128*32; +// voodoo_reg_log("auxBufferStride tiled = %i bytes, tiled\n", voodoo->aux_row_width); + } + else + { + voodoo->aux_row_width = val & 0x3fff; +// voodoo_reg_log("auxBufferStride linear = %i bytes, linear\n", voodoo->aux_row_width); + } + voodoo->params.aux_row_width = voodoo->aux_row_width; + } + break; + + case SST_clutData: + voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff; + if (val & 0x20000000) + { + voodoo->clutData[(val >> 24) & 0x3f].b = 255; + voodoo->clutData[(val >> 24) & 0x3f].g = 255; + voodoo->clutData[(val >> 24) & 0x3f].r = 255; + } + voodoo->clutData_dirty = 1; + break; + + case SST_sSetupMode: + voodoo->sSetupMode = val; + break; + case SST_sVx: + tempif.i = val; + voodoo->verts[3].sVx = tempif.f; +// voodoo_reg_log("sVx[%i]=%f\n", voodoo->vertex_num, tempif.f); + break; + case SST_sVy: + tempif.i = val; + voodoo->verts[3].sVy = tempif.f; +// voodoo_reg_log("sVy[%i]=%f\n", voodoo->vertex_num, tempif.f); + break; + case SST_sARGB: + voodoo->verts[3].sBlue = (float)(val & 0xff); + voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); + voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); + voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); + break; + case SST_sRed: + tempif.i = val; + voodoo->verts[3].sRed = tempif.f; + break; + case SST_sGreen: + tempif.i = val; + voodoo->verts[3].sGreen = tempif.f; + break; + case SST_sBlue: + tempif.i = val; + voodoo->verts[3].sBlue = tempif.f; + break; + case SST_sAlpha: + tempif.i = val; + voodoo->verts[3].sAlpha = tempif.f; + break; + case SST_sVz: + tempif.i = val; + voodoo->verts[3].sVz = tempif.f; + break; + case SST_sWb: + tempif.i = val; + voodoo->verts[3].sWb = tempif.f; + break; + case SST_sW0: + tempif.i = val; + voodoo->verts[3].sW0 = tempif.f; + break; + case SST_sS0: + tempif.i = val; + voodoo->verts[3].sS0 = tempif.f; + break; + case SST_sT0: + tempif.i = val; + voodoo->verts[3].sT0 = tempif.f; + break; + case SST_sW1: + tempif.i = val; + voodoo->verts[3].sW1 = tempif.f; + break; + case SST_sS1: + tempif.i = val; + voodoo->verts[3].sS1 = tempif.f; + break; + case SST_sT1: + tempif.i = val; + voodoo->verts[3].sT1 = tempif.f; + break; + + case SST_sBeginTriCMD: +// voodoo_reg_log("sBeginTriCMD %i %f\n", voodoo->vertex_num, voodoo->verts[4].sVx); + voodoo->verts[0] = voodoo->verts[3]; + voodoo->verts[1] = voodoo->verts[3]; + voodoo->verts[2] = voodoo->verts[3]; + voodoo->vertex_next_age = 0; + voodoo->vertex_ages[0] = voodoo->vertex_next_age++; + + voodoo->num_verticies = 1; + voodoo->cull_pingpong = 0; + break; + case SST_sDrawTriCMD: +// voodoo_reg_log("sDrawTriCMD %i %i\n", voodoo->num_verticies, voodoo->sSetupMode & SETUPMODE_STRIP_MODE); + /*I'm not sure this is the vertex selection algorithm actually used in the 3dfx + chips, but this works with a number of games that switch between strip and fan + mode in the middle of a run (eg Black & White, Viper Racing)*/ + if (voodoo->vertex_next_age < 3) + { + /*Fewer than three vertices already written, store in next slot*/ + int vertex_nr = voodoo->vertex_next_age; + + voodoo->verts[vertex_nr] = voodoo->verts[3]; + voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++; + } + else + { + int vertex_nr = 0; + + if (!(voodoo->sSetupMode & SETUPMODE_STRIP_MODE)) + { + /*Strip - find oldest vertex*/ + if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[2])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[2])) + vertex_nr = 1; + else + vertex_nr = 2; + } + else + { + /*Fan - find second oldest vertex (ie pivot around oldest)*/ + if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[2])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[1])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[2])) + vertex_nr = 1; + else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[0])) + vertex_nr = 1; + else + vertex_nr = 2; + } + voodoo->verts[vertex_nr] = voodoo->verts[3]; + voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++; + } + + voodoo->num_verticies++; + if (voodoo->num_verticies == 3) + { +// voodoo_reg_log("triangle_setup\n"); + voodoo_triangle_setup(voodoo); + voodoo->cull_pingpong = !voodoo->cull_pingpong; + + voodoo->num_verticies = 2; + } + break; + + case SST_bltSrcBaseAddr: + voodoo->bltSrcBaseAddr = val & 0x3fffff; + break; + case SST_bltDstBaseAddr: +// voodoo_reg_log("Write bltDstBaseAddr %08x\n", val); + voodoo->bltDstBaseAddr = val & 0x3fffff; + break; + case SST_bltXYStrides: + voodoo->bltSrcXYStride = val & 0xfff; + voodoo->bltDstXYStride = (val >> 16) & 0xfff; +// voodoo_reg_log("Write bltXYStrides %08x\n", val); + break; + case SST_bltSrcChromaRange: + voodoo->bltSrcChromaRange = val; + voodoo->bltSrcChromaMinB = val & 0x1f; + voodoo->bltSrcChromaMinG = (val >> 5) & 0x3f; + voodoo->bltSrcChromaMinR = (val >> 11) & 0x1f; + voodoo->bltSrcChromaMaxB = (val >> 16) & 0x1f; + voodoo->bltSrcChromaMaxG = (val >> 21) & 0x3f; + voodoo->bltSrcChromaMaxR = (val >> 27) & 0x1f; + break; + case SST_bltDstChromaRange: + voodoo->bltDstChromaRange = val; + voodoo->bltDstChromaMinB = val & 0x1f; + voodoo->bltDstChromaMinG = (val >> 5) & 0x3f; + voodoo->bltDstChromaMinR = (val >> 11) & 0x1f; + voodoo->bltDstChromaMaxB = (val >> 16) & 0x1f; + voodoo->bltDstChromaMaxG = (val >> 21) & 0x3f; + voodoo->bltDstChromaMaxR = (val >> 27) & 0x1f; + break; + case SST_bltClipX: + voodoo->bltClipRight = val & 0xfff; + voodoo->bltClipLeft = (val >> 16) & 0xfff; + break; + case SST_bltClipY: + voodoo->bltClipHighY = val & 0xfff; + voodoo->bltClipLowY = (val >> 16) & 0xfff; + break; + + case SST_bltSrcXY: + voodoo->bltSrcX = val & 0x7ff; + voodoo->bltSrcY = (val >> 16) & 0x7ff; + break; + case SST_bltDstXY: +// voodoo_reg_log("Write bltDstXY %08x\n", val); + voodoo->bltDstX = val & 0x7ff; + voodoo->bltDstY = (val >> 16) & 0x7ff; + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltSize: +// voodoo_reg_log("Write bltSize %08x\n", val); + voodoo->bltSizeX = val & 0xfff; + if (voodoo->bltSizeX & 0x800) + voodoo->bltSizeX |= 0xfffff000; + voodoo->bltSizeY = (val >> 16) & 0xfff; + if (voodoo->bltSizeY & 0x800) + voodoo->bltSizeY |= 0xfffff000; + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltRop: + voodoo->bltRop[0] = val & 0xf; + voodoo->bltRop[1] = (val >> 4) & 0xf; + voodoo->bltRop[2] = (val >> 8) & 0xf; + voodoo->bltRop[3] = (val >> 12) & 0xf; + break; + case SST_bltColor: +// voodoo_reg_log("Write bltColor %08x\n", val); + voodoo->bltColorFg = val & 0xffff; + voodoo->bltColorBg = (val >> 16) & 0xffff; + break; + + case SST_bltCommand: + voodoo->bltCommand = val; +// voodoo_reg_log("Write bltCommand %08x\n", val); + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltData: + voodoo_v2_blit_data(voodoo, val); + break; + + case SST_textureMode: + if (chip & CHIP_TREX0) + { + voodoo->params.textureMode[0] = val; + voodoo->params.tformat[0] = (val >> 8) & 0xf; + } + if (chip & CHIP_TREX1) + { + voodoo->params.textureMode[1] = val; + voodoo->params.tformat[1] = (val >> 8) & 0xf; + } + break; + case SST_tLOD: + if (chip & CHIP_TREX0) + { + voodoo->params.tLOD[0] = val; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.tLOD[1] = val; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_tDetail: + if (chip & CHIP_TREX0) + { + voodoo->params.detail_max[0] = val & 0xff; + voodoo->params.detail_bias[0] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[0] = (val >> 14) & 7; + } + if (chip & CHIP_TREX1) + { + voodoo->params.detail_max[1] = val & 0xff; + voodoo->params.detail_bias[1] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[1] = (val >> 14) & 7; + } + break; + case SST_texBaseAddr: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr[0] = (val & 0x7ffff) << 3; +// voodoo_reg_log("texBaseAddr = %08x %08x\n", voodoo->params.texBaseAddr[0], val); + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr1: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr1[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr1[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr1[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr1[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr2: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr2[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr2[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr2[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr2[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr38: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr38[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr38[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr38[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr38[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + + case SST_trexInit1: + if (chip & CHIP_TREX0) + voodoo->trexInit1[0] = val; + if (chip & CHIP_TREX1) + voodoo->trexInit1[1] = val; + break; + + case SST_nccTable0_Y0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + + case SST_nccTable0_I0: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_I2: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q0: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q2: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + if (val & (1 << 31)) + { + int p = (val >> 23) & 0xfe; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } + } + break; + + case SST_nccTable0_I1: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_I3: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q1: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q3: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + if (val & (1 << 31)) + { + int p = ((val >> 23) & 0xfe) | 0x01; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } + } + break; + + case SST_nccTable1_Y0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + + case SST_userIntrCMD: + fatal("userIntrCMD write %08x from FIFO\n", val); + break; + + + case SST_leftOverlayBuf: + voodoo->leftOverlayBuf = val; + break; + } +} diff --git a/src/video/vid_voodoo_render.c b/src/video/vid_voodoo_render.c new file mode 100644 index 000000000..2d7b260d6 --- /dev/null +++ b/src/video/vid_voodoo_render.c @@ -0,0 +1,1675 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_texture.h> + + +typedef struct voodoo_state_t +{ + int xstart, xend, xdir; + uint32_t base_r, base_g, base_b, base_a, base_z; + struct + { + int64_t base_s, base_t, base_w; + int lod; + } tmu[2]; + int64_t base_w; + int lod; + int lod_min[2], lod_max[2]; + int dx1, dx2; + int y, yend, ydir; + int32_t dxAB, dxAC, dxBC; + int tex_b[2], tex_g[2], tex_r[2], tex_a[2]; + int tex_s, tex_t; + int clamp_s[2], clamp_t[2]; + + int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; + + uint32_t *tex[2][LOD_MAX+1]; + int tformat; + + int *tex_w_mask[2]; + int *tex_h_mask[2]; + int *tex_shift[2]; + int *tex_lod[2]; + + uint16_t *fb_mem, *aux_mem; + + int32_t ib, ig, ir, ia; + int32_t z; + + int32_t new_depth; + + int64_t tmu0_s, tmu0_t; + int64_t tmu0_w; + int64_t tmu1_s, tmu1_t; + int64_t tmu1_w; + int64_t w; + + int pixel_count, texel_count; + int x, x2, x_tiled; + + uint32_t w_depth; + + float log_temp; + uint32_t ebp_store; + uint32_t texBaseAddr; + + int lod_frac[2]; +} voodoo_state_t; + +#ifdef ENABLE_VOODOO_RENDER_LOG +int voodoo_render_do_log = ENABLE_VOODOO_RENDER_LOG; + +static void +voodoo_render_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoo_render_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoo_render_log(fmt, ...) +#endif + + +static uint8_t logtable[256] = +{ + 0x00,0x01,0x02,0x04,0x05,0x07,0x08,0x09,0x0b,0x0c,0x0e,0x0f,0x10,0x12,0x13,0x15, + 0x16,0x17,0x19,0x1a,0x1b,0x1d,0x1e,0x1f,0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x2a, + 0x2b,0x2c,0x2e,0x2f,0x30,0x31,0x33,0x34,0x35,0x36,0x38,0x39,0x3a,0x3b,0x3d,0x3e, + 0x3f,0x40,0x41,0x43,0x44,0x45,0x46,0x47,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x50,0x51, + 0x52,0x53,0x54,0x55,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x60,0x61,0x62,0x63, + 0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, + 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x83,0x84,0x85, + 0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94, + 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa2,0xa3, + 0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xad,0xae,0xaf,0xb0,0xb1,0xb2, + 0xb3,0xb4,0xb5,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbc,0xbd,0xbe,0xbf,0xc0, + 0xc1,0xc2,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xcd, + 0xce,0xcf,0xd0,0xd1,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd6,0xd7,0xd8,0xd9,0xda,0xda, + 0xdb,0xdc,0xdd,0xde,0xde,0xdf,0xe0,0xe1,0xe1,0xe2,0xe3,0xe4,0xe5,0xe5,0xe6,0xe7, + 0xe8,0xe8,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3, + 0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff +}; + +static __inline int fastlog(uint64_t val) +{ + uint64_t oldval = val; + int exp = 63; + int frac; + + if (!val || val & (1ULL << 63)) + return 0x80000000; + + if (!(val & 0xffffffff00000000)) + { + exp -= 32; + val <<= 32; + } + if (!(val & 0xffff000000000000)) + { + exp -= 16; + val <<= 16; + } + if (!(val & 0xff00000000000000)) + { + exp -= 8; + val <<= 8; + } + if (!(val & 0xf000000000000000)) + { + exp -= 4; + val <<= 4; + } + if (!(val & 0xc000000000000000)) + { + exp -= 2; + val <<= 2; + } + if (!(val & 0x8000000000000000)) + { + exp -= 1; + val <<= 1; + } + + if (exp >= 8) + frac = (oldval >> (exp - 8)) & 0xff; + else + frac = (oldval << (8 - exp)) & 0xff; + + return (exp << 8) | logtable[frac]; +} + +static inline int voodoo_fls(uint16_t val) +{ + int num = 0; + +//voodoo_render_log("fls(%04x) = ", val); + if (!(val & 0xff00)) + { + num += 8; + val <<= 8; + } + if (!(val & 0xf000)) + { + num += 4; + val <<= 4; + } + if (!(val & 0xc000)) + { + num += 2; + val <<= 2; + } + if (!(val & 0x8000)) + { + num += 1; + val <<= 1; + } +//voodoo_render_log("%i %04x\n", num, val); + return num; +} + +typedef struct voodoo_texture_state_t +{ + int s, t; + int w_mask, h_mask; + int tex_shift; +} voodoo_texture_state_t; + +static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) +{ + uint32_t dat; + + if (texture_state->s & ~texture_state->w_mask) + { + if (state->clamp_s[tmu]) + { + if (texture_state->s < 0) + texture_state->s = 0; + if (texture_state->s > texture_state->w_mask) + texture_state->s = texture_state->w_mask; + } + else + texture_state->s &= texture_state->w_mask; + } + if (texture_state->t & ~texture_state->h_mask) + { + if (state->clamp_t[tmu]) + { + if (texture_state->t < 0) + texture_state->t = 0; + if (texture_state->t > texture_state->h_mask) + texture_state->t = texture_state->h_mask; + } + else + texture_state->t &= texture_state->h_mask; + } + + dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; + + state->tex_b[tmu] = dat & 0xff; + state->tex_g[tmu] = (dat >> 8) & 0xff; + state->tex_r[tmu] = (dat >> 16) & 0xff; + state->tex_a[tmu] = (dat >> 24) & 0xff; +} + +#define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4)) +#define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4)) + +static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) +{ + rgba_u dat[4]; + + if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask)) + { + int c; + for (c = 0; c < 4; c++) + { + int _s = s + (c & 1); + int _t = t + ((c & 2) >> 1); + + if (_s & ~texture_state->w_mask) + { + if (state->clamp_s[tmu]) + { + if (_s < 0) + _s = 0; + if (_s > texture_state->w_mask) + _s = texture_state->w_mask; + } + else + _s &= texture_state->w_mask; + } + if (_t & ~texture_state->h_mask) + { + if (state->clamp_t[tmu]) + { + if (_t < 0) + _t = 0; + if (_t > texture_state->h_mask) + _t = texture_state->h_mask; + } + else + _t &= texture_state->h_mask; + } + dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)]; + } + } + else + { + dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)]; + dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)]; + dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)]; + dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; + } + + state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8; + state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8; + state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8; + state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8; +} + +static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +{ + voodoo_texture_state_t texture_state; + int d[4]; + int s, t; + int tex_lod = state->tex_lod[tmu][state->lod]; + + texture_state.w_mask = state->tex_w_mask[tmu][state->lod]; + texture_state.h_mask = state->tex_h_mask[tmu][state->lod]; + texture_state.tex_shift = 8 - tex_lod; + + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + if (state->tex_s & 0x1000) + state->tex_s = ~state->tex_s; + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + if (state->tex_t & 0x1000) + state->tex_t = ~state->tex_t; + } + + if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6) + { + int _ds, dt; + + state->tex_s -= 1 << (3+tex_lod); + state->tex_t -= 1 << (3+tex_lod); + + s = state->tex_s >> tex_lod; + t = state->tex_t >> tex_lod; + + _ds = s & 0xf; + dt = t & 0xf; + + s >>= 4; + t >>= 4; +//if (x == 80) +//if (voodoo_output) +// voodoo_render_log("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt); + d[0] = (16 - _ds) * (16 - dt); + d[1] = _ds * (16 - dt); + d[2] = (16 - _ds) * dt; + d[3] = _ds * dt; + +// texture_state.s = s; +// texture_state.t = t; + tex_read_4(state, &texture_state, s, t, d, tmu, x); + + +/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8; + state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8; + state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8; + state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/ +/* state->tex_r = tex_samples[0].r; + state->tex_g = tex_samples[0].g; + state->tex_b = tex_samples[0].b; + state->tex_a = tex_samples[0].a;*/ + } + else + { + // rgba_t tex_samples; + // voodoo_texture_state_t texture_state; +// int s = state->tex_s >> (18+state->lod); +// int t = state->tex_t >> (18+state->lod); + // int s, t; + +// state->tex_s -= 1 << (17+state->lod); +// state->tex_t -= 1 << (17+state->lod); + + s = state->tex_s >> (4+tex_lod); + t = state->tex_t >> (4+tex_lod); + + texture_state.s = s; + texture_state.t = t; + tex_read(state, &texture_state, tmu); + +/* state->tex_r = tex_samples[0].rgba.r; + state->tex_g = tex_samples[0].rgba.g; + state->tex_b = tex_samples[0].rgba.b; + state->tex_a = tex_samples[0].rgba.a;*/ + } +} + +static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +{ + if (params->textureMode[tmu] & 1) + { + int64_t _w = 0; + + if (tmu) + { + if (state->tmu1_w) + _w = (int64_t)((1ULL << 48) / state->tmu1_w); + state->tex_s = (int32_t)(((((state->tmu1_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + state->tex_t = (int32_t)(((((state->tmu1_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + } + else + { + if (state->tmu0_w) + _w = (int64_t)((1ULL << 48) / state->tmu0_w); + state->tex_s = (int32_t)(((((state->tmu0_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + state->tex_t = (int32_t)(((((state->tmu0_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + } + + state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8)); + } + else + { + if (tmu) + { + state->tex_s = (int32_t)(state->tmu1_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu1_t >> (14+14)); + } + else + { + state->tex_s = (int32_t)(state->tmu0_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu0_t >> (14+14)); + } + state->lod = state->tmu[tmu].lod; + } + + if (state->lod < state->lod_min[tmu]) + state->lod = state->lod_min[tmu]; + else if (state->lod > state->lod_max[tmu]) + state->lod = state->lod_max[tmu]; + state->lod_frac[tmu] = state->lod & 0xff; + state->lod >>= 8; + + voodoo_get_texture(voodoo, params, state, tmu, x); +} + + +/*Perform texture fetch and blending for both TMUs*/ +static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) +{ + int r,g,b,a; + int c_reverse, a_reverse; +// int c_reverse1, a_reverse1; + int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0; + + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } +/* c_reverse1 = c_reverse; + a_reverse1 = a_reverse;*/ + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[1]; + factor_g = state->tex_g[1]; + factor_b = state->tex_b[1]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_r > params->detail_max[1]) + factor_r = params->detail_max[1]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[1]; + break; + } + if (!c_reverse) + { + r = (-state->tex_r[1] * (factor_r + 1)) >> 8; + g = (-state->tex_g[1] * (factor_g + 1)) >> 8; + b = (-state->tex_b[1] * (factor_b + 1)) >> 8; + } + else + { + r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8; + g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8; + b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal_1) + { + r += state->tex_r[1]; + g += state->tex_g[1]; + b += state->tex_b[1]; + } + else if (tc_add_alocal_1) + { + r += state->tex_a[1]; + g += state->tex_a[1]; + b += state->tex_a[1]; + } + state->tex_r[1] = CLAMP(r); + state->tex_g[1] = CLAMP(g); + state->tex_b[1] = CLAMP(b); + } + if (tca_sub_clocal_1) + { + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_AOTHER: + factor_a = 0; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_a > params->detail_max[1]) + factor_a = params->detail_max[1]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[1]; + break; + } + if (!a_reverse) + a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (-state->tex_a[1] * (factor_a + 1)) >> 8; + if (tca_add_clocal_1 || tca_add_alocal_1) + a += state->tex_a[1]; + state->tex_a[1] = CLAMP(a); + } + + + voodoo_tmu_fetch(voodoo, params, state, 0, x); + + if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } + + if (!tc_zero_other) + { + r = state->tex_r[1]; + g = state->tex_g[1]; + b = state->tex_b[1]; + } + else + r = g = b = 0; + if (tc_sub_clocal) + { + r -= state->tex_r[0]; + g -= state->tex_g[0]; + b -= state->tex_b[0]; + } + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[0]; + factor_g = state->tex_g[0]; + factor_b = state->tex_b[0]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[0]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_r > params->detail_max[0]) + factor_r = params->detail_max[0]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[0]; + break; + } + if (!c_reverse) + { + r = (r * (factor_r + 1)) >> 8; + g = (g * (factor_g + 1)) >> 8; + b = (b * (factor_b + 1)) >> 8; + } + else + { + r = (r * ((factor_r^0xff) + 1)) >> 8; + g = (g * ((factor_g^0xff) + 1)) >> 8; + b = (b * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal) + { + r += state->tex_r[0]; + g += state->tex_g[0]; + b += state->tex_b[0]; + } + else if (tc_add_alocal) + { + r += state->tex_a[0]; + g += state->tex_a[0]; + b += state->tex_a[0]; + } + + if (!tca_zero_other) + a = state->tex_a[1]; + else + a = 0; + if (tca_sub_clocal) + a -= state->tex_a[0]; + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_AOTHER: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_a > params->detail_max[0]) + factor_a = params->detail_max[0]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[0]; + break; + } + if (a_reverse) + a = (a * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (a * (factor_a + 1)) >> 8; + if (tca_add_clocal || tca_add_alocal) + a += state->tex_a[0]; + + + state->tex_r[0] = CLAMP(r); + state->tex_g[0] = CLAMP(g); + state->tex_b[0] = CLAMP(b); + state->tex_a[0] = CLAMP(a); + + if (tc_invert_output) + { + state->tex_r[0] ^= 0xff; + state->tex_g[0] ^= 0xff; + state->tex_b[0] ^= 0xff; + } + if (tca_invert_output) + state->tex_a[0] ^= 0xff; +} + +#if (defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__) +#include <86box/vid_voodoo_codegen_x86.h> +#elif (defined __amd64__) +#include <86box/vid_voodoo_codegen_x86-64.h> +#else +int voodoo_recomp = 0; +#endif + +static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int ystart, int yend, int odd_even) +{ +/* int rgb_sel = params->fbzColorPath & 3; + int a_sel = (params->fbzColorPath >> 2) & 3; + int cc_localselect = params->fbzColorPath & (1 << 4); + int cca_localselect = (params->fbzColorPath >> 5) & 3; + int cc_localselect_override = params->fbzColorPath & (1 << 7); + int cc_zero_other = params->fbzColorPath & (1 << 8); + int cc_sub_clocal = params->fbzColorPath & (1 << 9); + int cc_mselect = (params->fbzColorPath >> 10) & 7; + int cc_reverse_blend = params->fbzColorPath & (1 << 13); + int cc_add = (params->fbzColorPath >> 14) & 3; + int cc_add_alocal = params->fbzColorPath & (1 << 15); + int cc_invert_output = params->fbzColorPath & (1 << 16); + int cca_zero_other = params->fbzColorPath & (1 << 17); + int cca_sub_clocal = params->fbzColorPath & (1 << 18); + int cca_mselect = (params->fbzColorPath >> 19) & 7; + int cca_reverse_blend = params->fbzColorPath & (1 << 22); + int cca_add = (params->fbzColorPath >> 23) & 3; + int cca_invert_output = params->fbzColorPath & (1 << 25); + int src_afunc = (params->alphaMode >> 8) & 0xf; + int dest_afunc = (params->alphaMode >> 12) & 0xf; + int alpha_func = (params->alphaMode >> 1) & 7; + int a_ref = params->alphaMode >> 24; + int depth_op = (params->fbzMode >> 5) & 7; + int dither = params->fbzMode & FBZ_DITHER;*/ + int texels; + int c; +#ifndef NO_CODEGEN + uint8_t (*voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y); +#endif + int y_diff = SLI_ENABLED ? 2 : 1; + + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + texels = 1; + else + texels = 2; + + state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS; + state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT; + state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS; + state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT; +// int last_x; +// voodoo_render_log("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir); + + for (c = 0; c <= LOD_MAX; c++) + { + state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]]; + state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]]; + } + + state->tformat = params->tformat[0]; + + state->tex_w_mask[0] = params->tex_w_mask[0]; + state->tex_h_mask[0] = params->tex_h_mask[0]; + state->tex_shift[0] = params->tex_shift[0]; + state->tex_lod[0] = params->tex_lod[0]; + state->tex_w_mask[1] = params->tex_w_mask[1]; + state->tex_h_mask[1] = params->tex_h_mask[1]; + state->tex_shift[1] = params->tex_shift[1]; + state->tex_lod[1] = params->tex_lod[1]; + + if ((params->fbzMode & 1) && (ystart < params->clipLowY)) + { + int dy = params->clipLowY - ystart; + + state->base_r += params->dRdY*dy; + state->base_g += params->dGdY*dy; + state->base_b += params->dBdY*dy; + state->base_a += params->dAdY*dy; + state->base_z += params->dZdY*dy; + state->tmu[0].base_s += params->tmu[0].dSdY*dy; + state->tmu[0].base_t += params->tmu[0].dTdY*dy; + state->tmu[0].base_w += params->tmu[0].dWdY*dy; + state->tmu[1].base_s += params->tmu[1].dSdY*dy; + state->tmu[1].base_t += params->tmu[1].dTdY*dy; + state->tmu[1].base_w += params->tmu[1].dWdY*dy; + state->base_w += params->dWdY*dy; + state->xstart += state->dx1*dy; + state->xend += state->dx2*dy; + + ystart = params->clipLowY; + } + + if ((params->fbzMode & 1) && (yend >= params->clipHighY)) + yend = params->clipHighY; + + state->y = ystart; +// yend--; + + if (SLI_ENABLED) + { + int test_y; + + if (params->fbzMode & (1 << 17)) + test_y = (voodoo->v_disp-1) - state->y; + else + test_y = state->y; + + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (test_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(test_y & 1))) + { + state->y++; + + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + } +#ifndef NO_CODEGEN + if (voodoo->use_recompiler) + voodoo_draw = voodoo_get_block(voodoo, params, state, odd_even); + else + voodoo_draw = NULL; +#endif + + voodoo_render_log("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC); +// voodoo_render_log("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17)); + + for (; state->y < yend; state->y += y_diff) + { + int x, x2; + int real_y = (state->y << 4) + 8; + int start_x; + int dx; + uint16_t *fb_mem, *aux_mem; + + state->ir = state->base_r; + state->ig = state->base_g; + state->ib = state->base_b; + state->ia = state->base_a; + state->z = state->base_z; + state->tmu0_s = state->tmu[0].base_s; + state->tmu0_t = state->tmu[0].base_t; + state->tmu0_w = state->tmu[0].base_w; + state->tmu1_s = state->tmu[1].base_s; + state->tmu1_t = state->tmu[1].base_t; + state->tmu1_w = state->tmu[1].base_w; + state->w = state->base_w; + + x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4); + + if (real_y < state->vertexBy) + x2 = (state->vertexAx << 12) + ((state->dxAB * (real_y - state->vertexAy)) >> 4); + else + x2 = (state->vertexBx << 12) + ((state->dxBC * (real_y - state->vertexBy)) >> 4); + + if (params->fbzMode & (1 << 17)) + real_y = (voodoo->v_disp-1) - (real_y >> 4); + else + real_y >>= 4; + + if (SLI_ENABLED) + { + if (((real_y >> 1) & voodoo->odd_even_mask) != odd_even) + goto next_line; + } + else + { + if ((real_y & voodoo->odd_even_mask) != odd_even) + goto next_line; + } + + start_x = x; + + if (state->xdir > 0) + x2 -= (1 << 16); + else + x -= (1 << 16); + dx = ((x + 0x7000) >> 16) - (((state->vertexAx << 12) + 0x7000) >> 16); + x = (x + 0x7000) >> 16; + x2 = (x2 + 0x7000) >> 16; + + voodoo_render_log("%03i:%03i : Ax=%08x start_x=%08x dSdX=%016llx dx=%08x s=%08x -> ", x, state->y, state->vertexAx << 8, start_x, params->tmu[0].dTdX, dx, state->tmu0_t); + + state->ir += (params->dRdX * dx); + state->ig += (params->dGdX * dx); + state->ib += (params->dBdX * dx); + state->ia += (params->dAdX * dx); + state->z += (params->dZdX * dx); + state->tmu0_s += (params->tmu[0].dSdX * dx); + state->tmu0_t += (params->tmu[0].dTdX * dx); + state->tmu0_w += (params->tmu[0].dWdX * dx); + state->tmu1_s += (params->tmu[1].dSdX * dx); + state->tmu1_t += (params->tmu[1].dTdX * dx); + state->tmu1_w += (params->tmu[1].dWdX * dx); + state->w += (params->dWdX * dx); + + voodoo_render_log("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod)); + + if (params->fbzMode & 1) + { + if (state->xdir > 0) + { + if (x < params->clipLeft) + { + int dx = params->clipLeft - x; + + state->ir += params->dRdX*dx; + state->ig += params->dGdX*dx; + state->ib += params->dBdX*dx; + state->ia += params->dAdX*dx; + state->z += params->dZdX*dx; + state->tmu0_s += params->tmu[0].dSdX*dx; + state->tmu0_t += params->tmu[0].dTdX*dx; + state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; + state->w += params->dWdX*dx; + + x = params->clipLeft; + } + if (x2 >= params->clipRight) + x2 = params->clipRight-1; + } + else + { + if (x >= params->clipRight) + { + int dx = (params->clipRight-1) - x; + + state->ir += params->dRdX*dx; + state->ig += params->dGdX*dx; + state->ib += params->dBdX*dx; + state->ia += params->dAdX*dx; + state->z += params->dZdX*dx; + state->tmu0_s += params->tmu[0].dSdX*dx; + state->tmu0_t += params->tmu[0].dTdX*dx; + state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; + state->w += params->dWdX*dx; + + x = params->clipRight-1; + } + if (x2 < params->clipLeft) + x2 = params->clipLeft; + } + } + + if (x2 < x && state->xdir > 0) + goto next_line; + if (x2 > x && state->xdir < 0) + goto next_line; + + if (SLI_ENABLED) + { + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + ((real_y >> 1) * params->row_width)]; + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + ((real_y >> 1) * params->row_width)) & voodoo->fb_mask]; + } + else + { + if (params->col_tiled) + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y >> 5) * params->row_width + (real_y & 31) * 128]; + else + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * params->row_width)]; + if (params->aux_tiled) + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y >> 5) * params->aux_row_width + (real_y & 31) * 128) & voodoo->fb_mask]; + else + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * params->row_width)) & voodoo->fb_mask]; + } + + voodoo_render_log("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x\n", state->y, x, x2, state->xstart, state->xend, dx); + + state->pixel_count = 0; + state->texel_count = 0; + state->x = x; + state->x2 = x2; +#ifndef NO_CODEGEN + if (voodoo->use_recompiler) + { + voodoo_draw(state, params, x, real_y); + } + else +#endif + do + { + int x_tiled = (x & 63) | ((x >> 6) * 128*32/2); + start_x = x; + state->x = x; + voodoo->pixel_count[odd_even]++; + voodoo->texel_count[odd_even] += texels; + voodoo->fbiPixelsIn++; + + voodoo_render_log(" X=%03i T=%08x\n", x, state->tmu0_t); +// if (voodoo->fbzMode & FBZ_RGB_WMASK) + { + int update = 1; + uint8_t cother_r = 0, cother_g = 0, cother_b = 0, aother; + uint8_t clocal_r, clocal_g, clocal_b, alocal; + int src_r = 0, src_g = 0, src_b = 0, src_a = 0; + int msel_r, msel_g, msel_b, msel_a; + uint8_t dest_r, dest_g, dest_b, dest_a; + uint16_t dat; + int sel; + int32_t new_depth, w_depth; + + if (state->w & 0xffff00000000) + w_depth = 0; + else if (!(state->w & 0xffff0000)) + w_depth = 0xf001; + else + { + int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16)); + int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff; + w_depth = (exp << 12) + mant + 1; + if (w_depth > 0xffff) + w_depth = 0xffff; + } + +// w_depth = CLAMP16(w_depth); + + if (params->fbzMode & FBZ_W_BUFFER) + new_depth = w_depth; + else + new_depth = CLAMP16(state->z >> 12); + + if (params->fbzMode & FBZ_DEPTH_BIAS) + new_depth = CLAMP16(new_depth + (int16_t)params->zaColor); + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = voodoo->params.aux_tiled ? aux_mem[x_tiled] : aux_mem[x]; + + DEPTH_TEST((params->fbzMode & FBZ_DEPTH_SOURCE) ? (params->zaColor & 0xffff) : new_depth); + } + + dat = voodoo->params.col_tiled ? fb_mem[x_tiled] : fb_mem[x]; + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) + { + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + voodoo_tmu_fetch(voodoo, params, state, 0, x); + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + state->tex_r[0] = state->tex_r[1]; + state->tex_g[0] = state->tex_g[1]; + state->tex_b[0] = state->tex_b[1]; + state->tex_a[0] = state->tex_a[1]; + } + else + { + voodoo_tmu_fetch_and_blend(voodoo, params, state, x); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + state->tex_r[0] == params->chromaKey_r && + state->tex_g[0] == params->chromaKey_g && + state->tex_b[0] == params->chromaKey_b) + { + voodoo->fbiChromaFail++; + goto skip_pixel; + } + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + state->tex_r[0] = state->tex_g[0] = 0; + state->tex_b[0] = voodoo->tmuConfig; + } + + if (cc_localselect_override) + sel = (state->tex_a[0] & 0x80) ? 1 : 0; + else + sel = cc_localselect; + + if (sel) + { + clocal_r = (params->color0 >> 16) & 0xff; + clocal_g = (params->color0 >> 8) & 0xff; + clocal_b = params->color0 & 0xff; + } + else + { + clocal_r = CLAMP(state->ir >> 12); + clocal_g = CLAMP(state->ig >> 12); + clocal_b = CLAMP(state->ib >> 12); + } + + switch (_rgb_sel) + { + case CC_LOCALSELECT_ITER_RGB: /*Iterated RGB*/ + cother_r = CLAMP(state->ir >> 12); + cother_g = CLAMP(state->ig >> 12); + cother_b = CLAMP(state->ib >> 12); + break; + + case CC_LOCALSELECT_TEX: /*TREX Color Output*/ + cother_r = state->tex_r[0]; + cother_g = state->tex_g[0]; + cother_b = state->tex_b[0]; + break; + + case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/ + cother_r = (params->color1 >> 16) & 0xff; + cother_g = (params->color1 >> 8) & 0xff; + cother_b = params->color1 & 0xff; + break; + + case CC_LOCALSELECT_LFB: /*Linear Frame Buffer*/ + cother_r = src_r; + cother_g = src_g; + cother_b = src_b; + break; + } + + switch (cca_localselect) + { + case CCA_LOCALSELECT_ITER_A: + alocal = CLAMP(state->ia >> 12); + break; + + case CCA_LOCALSELECT_COLOR0: + alocal = (params->color0 >> 24) & 0xff; + break; + + case CCA_LOCALSELECT_ITER_Z: + alocal = CLAMP(state->z >> 20); + break; + + default: + fatal("Bad cca_localselect %i\n", cca_localselect); + alocal = 0xff; + break; + } + + switch (a_sel) + { + case A_SEL_ITER_A: + aother = CLAMP(state->ia >> 12); + break; + case A_SEL_TEX: + aother = state->tex_a[0]; + break; + case A_SEL_COLOR1: + aother = (params->color1 >> 24) & 0xff; + break; + default: + fatal("Bad a_sel %i\n", a_sel); + aother = 0; + break; + } + + if (cc_zero_other) + { + src_r = 0; + src_g = 0; + src_b = 0; + } + else + { + src_r = cother_r; + src_g = cother_g; + src_b = cother_b; + } + + if (cca_zero_other) + src_a = 0; + else + src_a = aother; + + if (cc_sub_clocal) + { + src_r -= clocal_r; + src_g -= clocal_g; + src_b -= clocal_b; + } + + if (cca_sub_clocal) + src_a -= alocal; + + switch (cc_mselect) + { + case CC_MSELECT_ZERO: + msel_r = 0; + msel_g = 0; + msel_b = 0; + break; + case CC_MSELECT_CLOCAL: + msel_r = clocal_r; + msel_g = clocal_g; + msel_b = clocal_b; + break; + case CC_MSELECT_AOTHER: + msel_r = aother; + msel_g = aother; + msel_b = aother; + break; + case CC_MSELECT_ALOCAL: + msel_r = alocal; + msel_g = alocal; + msel_b = alocal; + break; + case CC_MSELECT_TEX: + msel_r = state->tex_a[0]; + msel_g = state->tex_a[0]; + msel_b = state->tex_a[0]; + break; + case CC_MSELECT_TEXRGB: + msel_r = state->tex_r[0]; + msel_g = state->tex_g[0]; + msel_b = state->tex_b[0]; + break; + + default: + fatal("Bad cc_mselect %i\n", cc_mselect); + msel_r = 0; + msel_g = 0; + msel_b = 0; + break; + } + + switch (cca_mselect) + { + case CCA_MSELECT_ZERO: + msel_a = 0; + break; + case CCA_MSELECT_ALOCAL: + msel_a = alocal; + break; + case CCA_MSELECT_AOTHER: + msel_a = aother; + break; + case CCA_MSELECT_ALOCAL2: + msel_a = alocal; + break; + case CCA_MSELECT_TEX: + msel_a = state->tex_a[0]; + break; + + default: + fatal("Bad cca_mselect %i\n", cca_mselect); + msel_a = 0; + break; + } + + if (!cc_reverse_blend) + { + msel_r ^= 0xff; + msel_g ^= 0xff; + msel_b ^= 0xff; + } + msel_r++; + msel_g++; + msel_b++; + + if (!cca_reverse_blend) + msel_a ^= 0xff; + msel_a++; + + src_r = (src_r * msel_r) >> 8; + src_g = (src_g * msel_g) >> 8; + src_b = (src_b * msel_b) >> 8; + src_a = (src_a * msel_a) >> 8; + + switch (cc_add) + { + case CC_ADD_CLOCAL: + src_r += clocal_r; + src_g += clocal_g; + src_b += clocal_b; + break; + case CC_ADD_ALOCAL: + src_r += alocal; + src_g += alocal; + src_b += alocal; + break; + case 0: + break; + default: + fatal("Bad cc_add %i\n", cc_add); + } + + if (cca_add) + src_a += alocal; + + src_r = CLAMP(src_r); + src_g = CLAMP(src_g); + src_b = CLAMP(src_b); + src_a = CLAMP(src_a); + + if (cc_invert_output) + { + src_r ^= 0xff; + src_g ^= 0xff; + src_b ^= 0xff; + } + if (cca_invert_output) + src_a ^= 0xff; + + if (params->fogMode & FOG_ENABLE) + APPLY_FOG(src_r, src_g, src_b, state->z, state->ia, state->w); + + if (params->alphaMode & 1) + ALPHA_TEST(src_a); + + if (params->alphaMode & (1 << 4)) + ALPHA_BLEND(src_r, src_g, src_b, src_a); + + if (update) + { + if (dither) + { + if (dither2x2) + { + src_r = dither_rb2x2[src_r][real_y & 1][x & 1]; + src_g = dither_g2x2[src_g][real_y & 1][x & 1]; + src_b = dither_rb2x2[src_b][real_y & 1][x & 1]; + } + else + { + src_r = dither_rb[src_r][real_y & 3][x & 3]; + src_g = dither_g[src_g][real_y & 3][x & 3]; + src_b = dither_rb[src_b][real_y & 3][x & 3]; + } + } + else + { + src_r >>= 3; + src_g >>= 2; + src_b >>= 3; + } + + if (params->fbzMode & FBZ_RGB_WMASK) + { + if (voodoo->params.col_tiled) + fb_mem[x_tiled] = src_b | (src_g << 5) | (src_r << 11); + else + fb_mem[x] = src_b | (src_g << 5) | (src_r << 11); + } + if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) + { + if (voodoo->params.aux_tiled) + aux_mem[x_tiled] = new_depth; + else + aux_mem[x] = new_depth; + } + } + } + voodoo->fbiPixelsOut++; +skip_pixel: + if (state->xdir > 0) + { + state->ir += params->dRdX; + state->ig += params->dGdX; + state->ib += params->dBdX; + state->ia += params->dAdX; + state->z += params->dZdX; + state->tmu0_s += params->tmu[0].dSdX; + state->tmu0_t += params->tmu[0].dTdX; + state->tmu0_w += params->tmu[0].dWdX; + state->tmu1_s += params->tmu[1].dSdX; + state->tmu1_t += params->tmu[1].dTdX; + state->tmu1_w += params->tmu[1].dWdX; + state->w += params->dWdX; + } + else + { + state->ir -= params->dRdX; + state->ig -= params->dGdX; + state->ib -= params->dBdX; + state->ia -= params->dAdX; + state->z -= params->dZdX; + state->tmu0_s -= params->tmu[0].dSdX; + state->tmu0_t -= params->tmu[0].dTdX; + state->tmu0_w -= params->tmu[0].dWdX; + state->tmu1_s -= params->tmu[1].dSdX; + state->tmu1_t -= params->tmu[1].dTdX; + state->tmu1_w -= params->tmu[1].dWdX; + state->w -= params->dWdX; + } + + x += state->xdir; + } while (start_x != x2); + + voodoo->pixel_count[odd_even] += state->pixel_count; + voodoo->texel_count[odd_even] += state->texel_count; + voodoo->fbiPixelsIn += state->pixel_count; + + if (voodoo->params.draw_offset == voodoo->params.front_offset && (real_y >> 1) < 2048) + voodoo->dirty_line[real_y >> 1] = 1; + +next_line: + if (SLI_ENABLED) + { + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + + voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++; + voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++; +} + +void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even) +{ + voodoo_state_t state; + int vertexAy_adjusted; + int vertexCy_adjusted; + int dx, dy; + + uint64_t tempdx, tempdy; + uint64_t tempLOD; + int LOD; + int lodbias; + + voodoo->tri_count++; + + dx = 8 - (params->vertexAx & 0xf); + if ((params->vertexAx & 0xf) > 8) + dx += 16; + dy = 8 - (params->vertexAy & 0xf); + if ((params->vertexAy & 0xf) > 8) + dy += 16; + +/* voodoo_render_log("voodoo_triangle %i %i %i : vA %f, %f vB %f, %f vC %f, %f f %i,%i %08x %08x %08x,%08x tex=%i,%i fogMode=%08x\n", odd_even, voodoo->params_read_idx[odd_even], voodoo->params_read_idx[odd_even] & PARAM_MASK, (float)params->vertexAx / 16.0, (float)params->vertexAy / 16.0, + (float)params->vertexBx / 16.0, (float)params->vertexBy / 16.0, + (float)params->vertexCx / 16.0, (float)params->vertexCy / 16.0, + (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[0] : 0, + (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[1] : 0, params->fbzColorPath, params->alphaMode, params->textureMode[0],params->textureMode[1], params->tex_entry[0],params->tex_entry[1], params->fogMode);*/ + + state.base_r = params->startR; + state.base_g = params->startG; + state.base_b = params->startB; + state.base_a = params->startA; + state.base_z = params->startZ; + state.tmu[0].base_s = params->tmu[0].startS; + state.tmu[0].base_t = params->tmu[0].startT; + state.tmu[0].base_w = params->tmu[0].startW; + state.tmu[1].base_s = params->tmu[1].startS; + state.tmu[1].base_t = params->tmu[1].startT; + state.tmu[1].base_w = params->tmu[1].startW; + state.base_w = params->startW; + + if (params->fbzColorPath & FBZ_PARAM_ADJUST) + { + state.base_r += (dx*params->dRdX + dy*params->dRdY) >> 4; + state.base_g += (dx*params->dGdX + dy*params->dGdY) >> 4; + state.base_b += (dx*params->dBdX + dy*params->dBdY) >> 4; + state.base_a += (dx*params->dAdX + dy*params->dAdY) >> 4; + state.base_z += (dx*params->dZdX + dy*params->dZdY) >> 4; + state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4; + state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4; + state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4; + state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4; + state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4; + state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4; + state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4; + } + + tris++; + + state.vertexAy = params->vertexAy & ~0xffff0000; + if (state.vertexAy & 0x8000) + state.vertexAy |= 0xffff0000; + state.vertexBy = params->vertexBy & ~0xffff0000; + if (state.vertexBy & 0x8000) + state.vertexBy |= 0xffff0000; + state.vertexCy = params->vertexCy & ~0xffff0000; + if (state.vertexCy & 0x8000) + state.vertexCy |= 0xffff0000; + + state.vertexAx = params->vertexAx & ~0xffff0000; + if (state.vertexAx & 0x8000) + state.vertexAx |= 0xffff0000; + state.vertexBx = params->vertexBx & ~0xffff0000; + if (state.vertexBx & 0x8000) + state.vertexBx |= 0xffff0000; + state.vertexCx = params->vertexCx & ~0xffff0000; + if (state.vertexCx & 0x8000) + state.vertexCx |= 0xffff0000; + + vertexAy_adjusted = (state.vertexAy+7) >> 4; + vertexCy_adjusted = (state.vertexCy+7) >> 4; + + if (state.vertexBy - state.vertexAy) + state.dxAB = (int)((((int64_t)state.vertexBx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexBy - state.vertexAy); + else + state.dxAB = 0; + if (state.vertexCy - state.vertexAy) + state.dxAC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexCy - state.vertexAy); + else + state.dxAC = 0; + if (state.vertexCy - state.vertexBy) + state.dxBC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexBx << 12)) << 4) / (int)(state.vertexCy - state.vertexBy); + else + state.dxBC = 0; + + state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6; + state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6; + if (state.lod_max[0] > 0x800) + state.lod_max[0] = 0x800; + state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6; + state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6; + if (state.lod_max[1] > 0x800) + state.lod_max[1] = 0x800; + + state.xstart = state.xend = state.vertexAx << 8; + state.xdir = params->sign ? -1 : 1; + + state.y = (state.vertexAy + 8) >> 4; + state.ydir = 1; + + + tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14); + tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14); + + if (tempdx > tempdy) + tempLOD = tempdx; + else + tempLOD = tempdy; + + LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); + LOD >>= 2; + + lodbias = (params->tLOD[0] >> 12) & 0x3f; + if (lodbias & 0x20) + lodbias |= ~0x3f; + state.tmu[0].lod = LOD + (lodbias << 6); + + + tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14); + tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14); + + if (tempdx > tempdy) + tempLOD = tempdx; + else + tempLOD = tempdy; + + LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); + LOD >>= 2; + + lodbias = (params->tLOD[1] >> 12) & 0x3f; + if (lodbias & 0x20) + lodbias |= ~0x3f; + state.tmu[1].lod = LOD + (lodbias << 6); + + + voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even); +} + + +static void render_thread(void *param, int odd_even) +{ + voodoo_t *voodoo = (voodoo_t *)param; + + while (1) + { + thread_set_event(voodoo->render_not_full_event[odd_even]); + thread_wait_event(voodoo->wake_render_thread[odd_even], -1); + thread_reset_event(voodoo->wake_render_thread[odd_even]); + voodoo->render_voodoo_busy[odd_even] = 1; + + while (!PARAM_EMPTY(odd_even)) + { + uint64_t start_time = plat_timer_read(); + uint64_t end_time; + voodoo_params_t *params = &voodoo->params_buffer[voodoo->params_read_idx[odd_even] & PARAM_MASK]; + + voodoo_triangle(voodoo, params, odd_even); + + voodoo->params_read_idx[odd_even]++; + + if (PARAM_ENTRIES(odd_even) > (PARAM_SIZE - 10)) + thread_set_event(voodoo->render_not_full_event[odd_even]); + + end_time = plat_timer_read(); + voodoo->render_time[odd_even] += end_time - start_time; + } + + voodoo->render_voodoo_busy[odd_even] = 0; + } +} + +void voodoo_render_thread_1(void *param) +{ + render_thread(param, 0); +} +void voodoo_render_thread_2(void *param) +{ + render_thread(param, 1); +} +void voodoo_render_thread_3(void *param) +{ + render_thread(param, 2); +} +void voodoo_render_thread_4(void *param) +{ + render_thread(param, 3); +} + +void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) +{ + voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK]; + + while (PARAM_FULL(0) || (voodoo->render_threads >= 2 && PARAM_FULL(1)) || + (voodoo->render_threads == 4 && (PARAM_FULL(2) || PARAM_FULL(3)))) + { + thread_reset_event(voodoo->render_not_full_event[0]); + if (voodoo->render_threads >= 2) + thread_reset_event(voodoo->render_not_full_event[1]); + if (voodoo->render_threads == 4) + { + thread_reset_event(voodoo->render_not_full_event[2]); + thread_reset_event(voodoo->render_not_full_event[3]); + } + if (PARAM_FULL(0)) + thread_wait_event(voodoo->render_not_full_event[0], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads >= 2 && PARAM_FULL(1)) + thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads == 4 && PARAM_FULL(2)) + thread_wait_event(voodoo->render_not_full_event[2], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads == 4 && PARAM_FULL(3)) + thread_wait_event(voodoo->render_not_full_event[3], -1); /*Wait for room in ringbuffer*/ + } + + voodoo_use_texture(voodoo, params, 0); + if (voodoo->dual_tmus) + voodoo_use_texture(voodoo, params, 1); + + memcpy(params_new, params, sizeof(voodoo_params_t)); + + voodoo->params_write_idx++; + + if (PARAM_ENTRIES(0) < 4 || (voodoo->render_threads >= 2 && PARAM_ENTRIES(1) < 4) || + (voodoo->render_threads == 4 && (PARAM_ENTRIES(2) < 4 || PARAM_ENTRIES(3) < 4))) + voodoo_wake_render_thread(voodoo); +} diff --git a/src/video/vid_voodoo_setup.c b/src/video/vid_voodoo_setup.c new file mode 100644 index 000000000..283208960 --- /dev/null +++ b/src/video/vid_voodoo_setup.c @@ -0,0 +1,240 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_setup.h> + + +void voodoo_triangle_setup(voodoo_t *voodoo) +{ + float dxAB, dxBC, dyAB, dyBC; + float area; + int va = 0, vb = 1, vc = 2; + vert_t verts[3]; + + verts[0] = voodoo->verts[0]; + verts[1] = voodoo->verts[1]; + verts[2] = voodoo->verts[2]; + + if (verts[0].sVy < verts[1].sVy) + { + if (verts[1].sVy < verts[2].sVy) + { + /* V1>V0, V2>V1, V2>V1>V0*/ + va = 0; /*OK*/ + vb = 1; + vc = 2; + } + else + { + /* V1>V0, V1>V2*/ + if (verts[0].sVy < verts[2].sVy) + { + /* V1>V0, V1>V2, V2>V0, V1>V2>V0*/ + va = 0; + vb = 2; + vc = 1; + } + else + { + /* V1>V0, V1>V2, V0>V2, V1>V0>V2*/ + va = 2; + vb = 0; + vc = 1; + } + } + } + else + { + if (verts[1].sVy < verts[2].sVy) + { + /* V0>V1, V2>V1*/ + if (verts[0].sVy < verts[2].sVy) + { + /* V0>V1, V2>V1, V2>V0, V2>V0>V1*/ + va = 1; + vb = 0; + vc = 2; + } + else + { + /* V0>V1, V2>V1, V0>V2, V0>V2>V1*/ + va = 1; + vb = 2; + vc = 0; + } + } + else + { + /*V0>V1>V2*/ + va = 2; + vb = 1; + vc = 0; + } + } + + dxAB = verts[0].sVx - verts[1].sVx; + dxBC = verts[1].sVx - verts[2].sVx; + dyAB = verts[0].sVy - verts[1].sVy; + dyBC = verts[1].sVy - verts[2].sVy; + + area = dxAB * dyBC - dxBC * dyAB; + + if (area == 0.0) + return; + + if (voodoo->sSetupMode & SETUPMODE_CULLING_ENABLE) + { + int cull_sign = voodoo->sSetupMode & SETUPMODE_CULLING_SIGN; + int sign = (area < 0.0); + + if ((voodoo->sSetupMode & (SETUPMODE_CULLING_ENABLE | SETUPMODE_DISABLE_PINGPONG)) + == SETUPMODE_CULLING_ENABLE && voodoo->cull_pingpong) + cull_sign = !cull_sign; + + if (cull_sign && sign) + return; + if (!cull_sign && !sign) + return; + } + + + dxAB = verts[va].sVx - verts[vb].sVx; + dxBC = verts[vb].sVx - verts[vc].sVx; + dyAB = verts[va].sVy - verts[vb].sVy; + dyBC = verts[vb].sVy - verts[vc].sVy; + + area = dxAB * dyBC - dxBC * dyAB; + + dxAB /= area; + dxBC /= area; + dyAB /= area; + dyBC /= area; + + + + voodoo->params.vertexAx = (int32_t)(int16_t)((int32_t)(verts[va].sVx * 16.0f) & 0xffff); + voodoo->params.vertexAy = (int32_t)(int16_t)((int32_t)(verts[va].sVy * 16.0f) & 0xffff); + voodoo->params.vertexBx = (int32_t)(int16_t)((int32_t)(verts[vb].sVx * 16.0f) & 0xffff); + voodoo->params.vertexBy = (int32_t)(int16_t)((int32_t)(verts[vb].sVy * 16.0f) & 0xffff); + voodoo->params.vertexCx = (int32_t)(int16_t)((int32_t)(verts[vc].sVx * 16.0f) & 0xffff); + voodoo->params.vertexCy = (int32_t)(int16_t)((int32_t)(verts[vc].sVy * 16.0f) & 0xffff); + + if (voodoo->params.vertexAy > voodoo->params.vertexBy || voodoo->params.vertexBy > voodoo->params.vertexCy) + fatal("triangle_setup wrong order %d %d %d\n", voodoo->params.vertexAy, voodoo->params.vertexBy, voodoo->params.vertexCy); + + if (voodoo->sSetupMode & SETUPMODE_RGB) + { + voodoo->params.startR = (int32_t)(verts[va].sRed * 4096.0f); + voodoo->params.dRdX = (int32_t)(((verts[va].sRed - verts[vb].sRed) * dyBC - (verts[vb].sRed - verts[vc].sRed) * dyAB) * 4096.0f); + voodoo->params.dRdY = (int32_t)(((verts[vb].sRed - verts[vc].sRed) * dxAB - (verts[va].sRed - verts[vb].sRed) * dxBC) * 4096.0f); + voodoo->params.startG = (int32_t)(verts[va].sGreen * 4096.0f); + voodoo->params.dGdX = (int32_t)(((verts[va].sGreen - verts[vb].sGreen) * dyBC - (verts[vb].sGreen - verts[vc].sGreen) * dyAB) * 4096.0f); + voodoo->params.dGdY = (int32_t)(((verts[vb].sGreen - verts[vc].sGreen) * dxAB - (verts[va].sGreen - verts[vb].sGreen) * dxBC) * 4096.0f); + voodoo->params.startB = (int32_t)(verts[va].sBlue * 4096.0f); + voodoo->params.dBdX = (int32_t)(((verts[va].sBlue - verts[vb].sBlue) * dyBC - (verts[vb].sBlue - verts[vc].sBlue) * dyAB) * 4096.0f); + voodoo->params.dBdY = (int32_t)(((verts[vb].sBlue - verts[vc].sBlue) * dxAB - (verts[va].sBlue - verts[vb].sBlue) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_ALPHA) + { + voodoo->params.startA = (int32_t)(verts[va].sAlpha * 4096.0f); + voodoo->params.dAdX = (int32_t)(((verts[va].sAlpha - verts[vb].sAlpha) * dyBC - (verts[vb].sAlpha - verts[vc].sAlpha) * dyAB) * 4096.0f); + voodoo->params.dAdY = (int32_t)(((verts[vb].sAlpha - verts[vc].sAlpha) * dxAB - (verts[va].sAlpha - verts[vb].sAlpha) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_Z) + { + voodoo->params.startZ = (int32_t)(verts[va].sVz * 4096.0f); + voodoo->params.dZdX = (int32_t)(((verts[va].sVz - verts[vb].sVz) * dyBC - (verts[vb].sVz - verts[vc].sVz) * dyAB) * 4096.0f); + voodoo->params.dZdY = (int32_t)(((verts[vb].sVz - verts[vc].sVz) * dxAB - (verts[va].sVz - verts[vb].sVz) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_Wb) + { + voodoo->params.startW = (int64_t)(verts[va].sWb * 4294967296.0f); + voodoo->params.dWdX = (int64_t)(((verts[va].sWb - verts[vb].sWb) * dyBC - (verts[vb].sWb - verts[vc].sWb) * dyAB) * 4294967296.0f); + voodoo->params.dWdY = (int64_t)(((verts[vb].sWb - verts[vc].sWb) * dxAB - (verts[va].sWb - verts[vb].sWb) * dxBC) * 4294967296.0f); + voodoo->params.tmu[0].startW = voodoo->params.tmu[1].startW = voodoo->params.startW; + voodoo->params.tmu[0].dWdX = voodoo->params.tmu[1].dWdX = voodoo->params.dWdX; + voodoo->params.tmu[0].dWdY = voodoo->params.tmu[1].dWdY = voodoo->params.dWdY; + } + if (voodoo->sSetupMode & SETUPMODE_W0) + { + voodoo->params.tmu[0].startW = (int64_t)(verts[va].sW0 * 4294967296.0f); + voodoo->params.tmu[0].dWdX = (int64_t)(((verts[va].sW0 - verts[vb].sW0) * dyBC - (verts[vb].sW0 - verts[vc].sW0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dWdY = (int64_t)(((verts[vb].sW0 - verts[vc].sW0) * dxAB - (verts[va].sW0 - verts[vb].sW0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startW = voodoo->params.tmu[0].startW; + voodoo->params.tmu[1].dWdX = voodoo->params.tmu[0].dWdX; + voodoo->params.tmu[1].dWdY = voodoo->params.tmu[0].dWdY; + } + if (voodoo->sSetupMode & SETUPMODE_S0_T0) + { + voodoo->params.tmu[0].startS = (int64_t)(verts[va].sS0 * 4294967296.0f); + voodoo->params.tmu[0].dSdX = (int64_t)(((verts[va].sS0 - verts[vb].sS0) * dyBC - (verts[vb].sS0 - verts[vc].sS0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dSdY = (int64_t)(((verts[vb].sS0 - verts[vc].sS0) * dxAB - (verts[va].sS0 - verts[vb].sS0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[0].startT = (int64_t)(verts[va].sT0 * 4294967296.0f); + voodoo->params.tmu[0].dTdX = (int64_t)(((verts[va].sT0 - verts[vb].sT0) * dyBC - (verts[vb].sT0 - verts[vc].sT0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dTdY = (int64_t)(((verts[vb].sT0 - verts[vc].sT0) * dxAB - (verts[va].sT0 - verts[vb].sT0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startS = voodoo->params.tmu[0].startS; + voodoo->params.tmu[1].dSdX = voodoo->params.tmu[0].dSdX; + voodoo->params.tmu[1].dSdY = voodoo->params.tmu[0].dSdY; + voodoo->params.tmu[1].startT = voodoo->params.tmu[0].startT; + voodoo->params.tmu[1].dTdX = voodoo->params.tmu[0].dTdX; + voodoo->params.tmu[1].dTdY = voodoo->params.tmu[0].dTdY; + } + if (voodoo->sSetupMode & SETUPMODE_W1) + { + voodoo->params.tmu[1].startW = (int64_t)(verts[va].sW1 * 4294967296.0f); + voodoo->params.tmu[1].dWdX = (int64_t)(((verts[va].sW1 - verts[vb].sW1) * dyBC - (verts[vb].sW1 - verts[vc].sW1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dWdY = (int64_t)(((verts[vb].sW1 - verts[vc].sW1) * dxAB - (verts[va].sW1 - verts[vb].sW1) * dxBC) * 4294967296.0f); + } + if (voodoo->sSetupMode & SETUPMODE_S1_T1) + { + voodoo->params.tmu[1].startS = (int64_t)(verts[va].sS1 * 4294967296.0f); + voodoo->params.tmu[1].dSdX = (int64_t)(((verts[va].sS1 - verts[vb].sS1) * dyBC - (verts[vb].sS1 - verts[vc].sS1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dSdY = (int64_t)(((verts[vb].sS1 - verts[vc].sS1) * dxAB - (verts[va].sS1 - verts[vb].sS1) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startT = (int64_t)(verts[va].sT1 * 4294967296.0f); + voodoo->params.tmu[1].dTdX = (int64_t)(((verts[va].sT1 - verts[vb].sT1) * dyBC - (verts[vb].sT1 - verts[vc].sT1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dTdY = (int64_t)(((verts[vb].sT1 - verts[vc].sT1) * dxAB - (verts[va].sT1 - verts[vb].sT1) * dxBC) * 4294967296.0f); + } + + voodoo->params.sign = (area < 0.0); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); +} diff --git a/src/video/vid_voodoo_texture.c b/src/video/vid_voodoo_texture.c new file mode 100644 index 000000000..9f6e59b33 --- /dev/null +++ b/src/video/vid_voodoo_texture.c @@ -0,0 +1,627 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * 3DFX Voodoo emulation. + * + * + * + * Authors: Sarah Walker, + * + * Copyright 2008-2020 Sarah Walker. + */ +#include +#include +#include +#include +#include +#include +#include +#include <86box/86box.h> +#include "cpu.h" +#include <86box/machine.h> +#include <86box/device.h> +#include <86box/mem.h> +#include <86box/timer.h> +#include <86box/device.h> +#include <86box/plat.h> +#include <86box/video.h> +#include <86box/vid_svga.h> +#include <86box/vid_voodoo_common.h> +#include <86box/vid_voodoo_dither.h> +#include <86box/vid_voodoo_regs.h> +#include <86box/vid_voodoo_render.h> +#include <86box/vid_voodoo_texture.h> + + +#ifdef ENABLE_VOODOO_TEXTURE_LOG +int voodoo_texture_do_log = ENABLE_VOODOO_TEXTURE_LOG; + +static void +voodoo_texture_log(const char *fmt, ...) +{ + va_list ap; + + if (voodoo_texture_do_log) { + va_start(ap, fmt); + pclog_ex(fmt, ap); + va_end(ap); + } +} +#else +#define voodoo_texture_log(fmt, ...) +#endif + + +void voodoo_recalc_tex(voodoo_t *voodoo, int tmu) +{ + int aspect = (voodoo->params.tLOD[tmu] >> 21) & 3; + int width = 256, height = 256; + int shift = 8; + int lod; + uint32_t base = voodoo->params.texBaseAddr[tmu]; + uint32_t offset = 0; + int tex_lod = 0; + uint32_t offsets[LOD_MAX+3]; + int widths[LOD_MAX+3], heights[LOD_MAX+3], shifts[LOD_MAX+3]; + + if (voodoo->params.tLOD[tmu] & LOD_S_IS_WIDER) + height >>= aspect; + else + { + width >>= aspect; + shift -= aspect; + } + + for (lod = 0; lod <= LOD_MAX + 2; lod++) + { + offsets[lod] = offset; + widths[lod] = width >> lod; + heights[lod] = height >> lod; + shifts[lod] = shift - lod; + + if (!widths[lod]) + widths[lod] = 1; + if (!heights[lod]) + heights[lod] = 1; + if (shifts[lod] < 0) + shifts[lod] = 0; + + if (!(voodoo->params.tLOD[tmu] & LOD_SPLIT) || + ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || + (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) + { + if (voodoo->params.tformat[tmu] & 8) + offset += (width >> lod) * (height >> lod) * 2; + else + offset += (width >> lod) * (height >> lod); + } + } + + + if ((voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) && (voodoo->params.tLOD[tmu] & LOD_ODD)) + tex_lod++; /*Skip LOD 0*/ + +// voodoo_texture_log("TMU %i: %08x\n", tmu, voodoo->params.textureMode[tmu]); + for (lod = 0; lod <= LOD_MAX+1; lod++) + { + if (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR) + { + switch (tex_lod) + { + case 0: + base = voodoo->params.texBaseAddr[tmu]; + break; + case 1: + base = voodoo->params.texBaseAddr1[tmu]; + break; + case 2: + base = voodoo->params.texBaseAddr2[tmu]; + break; + default: + base = voodoo->params.texBaseAddr38[tmu]; + break; + } + } + + voodoo->params.tex_base[tmu][lod] = base + offsets[tex_lod]; + if (voodoo->params.tformat[tmu] & 8) + voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod] * 2); + else + voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod]); + voodoo->params.tex_w_mask[tmu][lod] = widths[tex_lod] - 1; + voodoo->params.tex_w_nmask[tmu][lod] = ~(widths[tex_lod] - 1); + voodoo->params.tex_h_mask[tmu][lod] = heights[tex_lod] - 1; + voodoo->params.tex_shift[tmu][lod] = shifts[tex_lod]; + voodoo->params.tex_lod[tmu][lod] = tex_lod; + + if (!(voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) || + ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || + (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) + { + if (!(voodoo->params.tLOD[tmu] & LOD_ODD) || lod != 0) + { + if (voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) + tex_lod += 2; + else + tex_lod++; + } + } + } + + voodoo->params.tex_width[tmu] = width; +} + +#define makergba(r, g, b, a) ((b) | ((g) << 8) | ((r) << 16) | ((a) << 24)) + +void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) +{ + int c, d; + int lod; + int lod_min, lod_max; + uint32_t addr = 0, addr_end; + uint32_t palette_checksum; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) + { + if (voodoo->palette_dirty[tmu]) + { + palette_checksum = 0; + + for (c = 0; c < 256; c++) + palette_checksum ^= voodoo->palette[tmu][c].u; + + voodoo->palette_checksum[tmu] = palette_checksum; + voodoo->palette_dirty[tmu] = 0; + } + else + palette_checksum = voodoo->palette_checksum[tmu]; + } + else + palette_checksum = 0; + + if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) + addr = params->texBaseAddr1[tmu]; + else + addr = params->texBaseAddr[tmu]; + + /*Try to find texture in cache*/ + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base == addr && + voodoo->texture_cache[tmu][c].tLOD == (params->tLOD[tmu] & 0xf00fff) && + voodoo->texture_cache[tmu][c].palette_checksum == palette_checksum) + { + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; + return; + } + } + + /*Texture not found, search for unused texture*/ + do + { + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_last_removed++; + voodoo->texture_last_removed &= (TEX_CACHE_MAX-1); + if (voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[0] && + (voodoo->render_threads == 1 || voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[1])) + break; + } + if (c == TEX_CACHE_MAX) + voodoo_wait_for_render_thread_idle(voodoo); + } while (c == TEX_CACHE_MAX); + if (c == TEX_CACHE_MAX) + fatal("Texture cache full!\n"); + + c = voodoo->texture_last_removed; + + + if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) + voodoo->texture_cache[tmu][c].base = params->texBaseAddr1[tmu]; + else + voodoo->texture_cache[tmu][c].base = params->texBaseAddr[tmu]; + voodoo->texture_cache[tmu][c].tLOD = params->tLOD[tmu] & 0xf00fff; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; +// voodoo_texture_log(" add new texture to %i tformat=%i %08x LOD=%i-%i tmu=%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max, tmu); + lod_min = MIN(lod_min, 8); + lod_max = MIN(lod_max, 8); + for (lod = lod_min; lod <= lod_max; lod++) + { + uint32_t *base = &voodoo->texture_cache[tmu][c].data[texture_offset[lod]]; + uint32_t tex_addr = params->tex_base[tmu][lod] & voodoo->texture_mask; + int x, y; + int shift = 8 - params->tex_lod[tmu][lod]; + rgba_u *pal; + + //voodoo_texture_log(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]); + + + switch (params->tformat[tmu]) + { + case TEX_RGB332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat].r, rgb332[dat].g, rgb332[dat].b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_A8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, dat); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_AI8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba((dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0xf0) | ((dat >> 4) & 0x0f)); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_PAL8: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_APAL8: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + int r = ((pal[dat].rgba.r & 3) << 6) | ((pal[dat].rgba.g & 0xf0) >> 2) | (pal[dat].rgba.r & 3); + int g = ((pal[dat].rgba.g & 0xf) << 4) | ((pal[dat].rgba.b & 0xc0) >> 4) | ((pal[dat].rgba.g & 0xf) >> 2); + int b = ((pal[dat].rgba.b & 0x3f) << 2) | ((pal[dat].rgba.b & 0x30) >> 4); + int a = (pal[dat].rgba.r & 0xfc) | ((pal[dat].rgba.r & 0xc0) >> 6); + + base[x] = makergba(r, g, b, a); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_ARGB8332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat & 0xff].r, rgb332[dat & 0xff].g, rgb332[dat & 0xff].b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_R5G6B5: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb565[dat].r, rgb565[dat].g, rgb565[dat].b, 0xff); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB1555: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb1555[dat].r, argb1555[dat].g, argb1555[dat].b, argb1555[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB4444: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb4444[dat].r, argb4444[dat].g, argb4444[dat].b, argb4444[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(dat & 0xff, dat & 0xff, dat & 0xff, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_APAL88: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + default: + fatal("Unknown texture format %i\n", params->tformat[tmu]); + } + } + + voodoo->texture_cache[tmu][c].is16 = voodoo->params.tformat[tmu] & 8; + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) + voodoo->texture_cache[tmu][c].palette_checksum = palette_checksum; + else + voodoo->texture_cache[tmu][c].palette_checksum = 0; + + if (lod_min == 0) + { + voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->params.tex_base[tmu][0]; + voodoo->texture_cache[tmu][c].addr_end[0] = voodoo->params.tex_end[tmu][0]; + } + else + voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->texture_cache[tmu][c].addr_end[0] = 0; + + if (lod_min <= 1 && lod_max >= 1) + { + voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->params.tex_base[tmu][1]; + voodoo->texture_cache[tmu][c].addr_end[1] = voodoo->params.tex_end[tmu][1]; + } + else + voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->texture_cache[tmu][c].addr_end[1] = 0; + + if (lod_min <= 2 && lod_max >= 2) + { + voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->params.tex_base[tmu][2]; + voodoo->texture_cache[tmu][c].addr_end[2] = voodoo->params.tex_end[tmu][2]; + } + else + voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->texture_cache[tmu][c].addr_end[2] = 0; + + if (lod_max >= 3) + { + voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->params.tex_base[tmu][(lod_min > 3) ? lod_min : 3]; + voodoo->texture_cache[tmu][c].addr_end[3] = voodoo->params.tex_end[tmu][(lod_max < 8) ? lod_max : 8]; + } + else + voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->texture_cache[tmu][c].addr_end[3] = 0; + + + for (d = 0; d < 4; d++) + { + addr = voodoo->texture_cache[tmu][c].addr_start[d]; + addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; + + if (addr_end != 0) + { + for (; addr <= addr_end; addr += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + } + } + + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; +} + +void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu) +{ + int wait_for_idle = 0; + int c; + + memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0])); +// voodoo_texture_log("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present)); + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base != -1) + { + int d; + + for (d = 0; d < 4; d++) + { + int addr_start = voodoo->texture_cache[tmu][c].addr_start[d]; + int addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; + + if (addr_end != 0) + { + int addr_start_masked = addr_start & voodoo->texture_mask & ~0x3ff; + int addr_end_masked = ((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff; + + if (addr_end_masked < addr_start_masked) + addr_end_masked = voodoo->texture_mask+1; + if (dirty_addr >= addr_start_masked && dirty_addr < addr_end_masked) + { +// voodoo_texture_log(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base); + + if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] || + (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1])) + wait_for_idle = 1; + + voodoo->texture_cache[tmu][c].base = -1; + } + else + { + for (; addr_start <= addr_end; addr_start += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr_start & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + } + } + } + } + } + if (wait_for_idle) + voodoo_wait_for_render_thread_idle(voodoo); +} + +void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) +{ + int lod, s, t; + voodoo_t *voodoo = (voodoo_t *)p; + int tmu; + + if (addr & 0x400000) + return; /*TREX != 0*/ + + tmu = (addr & 0x200000) ? 1 : 0; + + if (tmu && !voodoo->dual_tmus) + return; + + if (voodoo->type < VOODOO_BANSHEE) + { + if (!(voodoo->params.tformat[tmu] & 8) && voodoo->type >= VOODOO_BANSHEE) + { + lod = (addr >> 16) & 0xf; + t = (addr >> 8) & 0xff; + } + else + { + lod = (addr >> 17) & 0xf; + t = (addr >> 9) & 0xff; + } + if (voodoo->params.tformat[tmu] & 8) + s = (addr >> 1) & 0xfe; + else + { + if ((voodoo->params.textureMode[tmu] & (1 << 31)) || voodoo->type >= VOODOO_BANSHEE) + s = addr & 0xfc; + else + s = (addr >> 1) & 0xfc; + } + if (lod > LOD_MAX) + return; + +// if (addr >= 0x200000) +// return; + + if (voodoo->params.tformat[tmu] & 8) + addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2; + else + addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]); + } + else + addr = (addr & 0x1ffffc) + voodoo->params.tex_base[tmu][0]; + + if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// voodoo_texture_log("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu); + } + if (voodoo->type == VOODOO_3 && voodoo->texture_present[tmu^1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// voodoo_texture_log("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu^1); + } + *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val; +} diff --git a/src/win/Makefile.mingw b/src/win/Makefile.mingw index ffe6b66bf..d8d7ba9b8 100644 --- a/src/win/Makefile.mingw +++ b/src/win/Makefile.mingw @@ -755,6 +755,7 @@ VIDOBJ := video.o \ vid_wy700.o \ vid_ega.o vid_ega_render.o \ vid_svga.o vid_svga_render.o \ + vid_ddc.o \ vid_vga.o \ vid_ati_eeprom.o \ vid_ati18800.o vid_ati28800.o \ @@ -774,7 +775,13 @@ VIDOBJ := video.o \ vid_att20c49x_ramdac.o \ vid_s3.o vid_s3_virge.o \ vid_sdac_ramdac.o \ - vid_voodoo.o + vid_voodoo.o vid_voodoo_banshee.o \ + vid_voodoo_banshee_blitter.o \ + vid_voodoo_blitter.o \ + vid_voodoo_display.o vid_voodoo_fb.o \ + vid_voodoo_fifo.o vid_voodoo_reg.o \ + vid_voodoo_render.o vid_voodoo_setup.o \ + vid_voodoo_texture.o PLATOBJ := win.o \ win_dynld.o win_thread.o \ diff --git a/src/win/win_settings.c b/src/win/win_settings.c index 3242c59cf..bc334e042 100644 --- a/src/win/win_settings.c +++ b/src/win/win_settings.c @@ -662,7 +662,6 @@ win_settings_machine_recalc_fpu(HWND hdlg) static void win_settings_machine_recalc_cpu(HWND hdlg) { - HWND h; int cpu_type; #ifdef USE_DYNAREC int cpu_flags; @@ -672,7 +671,6 @@ win_settings_machine_recalc_cpu(HWND hdlg) settings_enable_window(hdlg, IDC_COMBO_WS, (cpu_type >= CPU_286) && (cpu_type <= CPU_386DX)); #ifdef USE_DYNAREC - h = GetDlgItem(hdlg, IDC_CHECK_DYNAREC); cpu_flags = temp_cpu_f->cpus[temp_cpu].cpu_flags; if (!(cpu_flags & CPU_SUPPORTS_DYNAREC) && (cpu_flags & CPU_REQUIRES_DYNAREC)) fatal("Attempting to select a CPU that requires the recompiler and does not support it at the same time\n"); @@ -681,10 +679,12 @@ win_settings_machine_recalc_cpu(HWND hdlg) temp_dynarec = 0; if (cpu_flags & CPU_REQUIRES_DYNAREC) temp_dynarec = 1; - SendMessage(h, BM_SETCHECK, temp_dynarec, 0); - EnableWindow(h, FALSE); - } else - EnableWindow(h, TRUE); + settings_set_check(hdlg, IDC_CHECK_DYNAREC, temp_dynarec); + settings_enable_window(hdlg, IDC_CHECK_DYNAREC, FALSE); + } else { + settings_set_check(hdlg, IDC_CHECK_DYNAREC, temp_dynarec); + settings_enable_window(hdlg, IDC_CHECK_DYNAREC, TRUE); + } #endif win_settings_machine_recalc_fpu(hdlg); @@ -3275,6 +3275,7 @@ win_settings_floppy_drives_recalc_list(HWND hdlg) lvI.iSubItem = 1; lvI.pszText = plat_get_string(temp_fdd_turbo[i] ? IDS_2060 : IDS_2061); + lvI.iItem = i; lvI.iImage = 0; if (ListView_SetItem(hwndList, &lvI) == -1) @@ -3282,6 +3283,8 @@ win_settings_floppy_drives_recalc_list(HWND hdlg) lvI.iSubItem = 2; lvI.pszText = plat_get_string(temp_fdd_check_bpb[i] ? IDS_2060 : IDS_2061); + lvI.iItem = i; + lvI.iImage = 0; if (ListView_SetItem(hwndList, &lvI) == -1) return FALSE;