diff --git a/src/Makefile.mingw b/src/Makefile.mingw index c401b170d..a68238a77 100644 --- a/src/Makefile.mingw +++ b/src/Makefile.mingw @@ -5,7 +5,7 @@ WINDRES = windres.exe CFLAGS = -O3 -march=native -mtune=native -fbranch-probabilities -fvpt -funroll-loops -fpeel-loops -ftracer -fomit-frame-pointer -ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse -mstackrealign OBJ = 386.o 386_dynarec.o 386_dynarec_ops.o 808x.o acer386sx.o acerm3a.o ali1429.o amstrad.o cdrom-ioctl.o cdrom-iso.o \ cdrom-null.o codegen.o codegen_ops.o codegen_timing_486.o codegen_timing_686.o codegen_timing_pentium.o codegen_timing_winchip.o codegen_x86.o compaq.o config.o cpu.o dac.o \ - device.o disc.o disc_86f.o disc_fdi.o disc_img_86box.o disc_random.o disc_td0.o dma.o fdc.o fdc37c665.o fdc37c932fr.o fdd.o fdi2raw.o gameport.o headland.o i430hx.o i430lx.o i430fx.o \ + device.o disc.o disc_86f.o disc_fdi.o disc_imd.o disc_img_86box.o disc_random.o disc_td0.o dma.o fdc.o fdc37c665.o fdc37c932fr.o fdd.o fdi2raw.o gameport.o headland.o i430hx.o i430lx.o i430fx.o \ i430nx.o i430vx.o i440fx.o ide.o intel.o intel_flash.o io.o jim.o joystick_ch_flightstick_pro.o joystick_standard.o joystick_sw_pad.o joystick_tm_fcs.o keyboard.o keyboard_amstrad.o keyboard_at.o \ keyboard_olim24.o keyboard_pcjr.o keyboard_xt.o lpt.o mcr.o mem.o memregs.o model.o mouse.o mouse_ps2.o \ mouse_serial.o ne2000.o neat.o nethandler.o nmi.o nvr.o olivetti_m24.o opti.o pc.o pc87306.o pci.o pic.o piix.o pit.o ppi.o ps1.o rom.o rtc.o \ diff --git a/src/Makefile.mingw64 b/src/Makefile.mingw64 index 1e197636a..9979dd3be 100644 --- a/src/Makefile.mingw64 +++ b/src/Makefile.mingw64 @@ -5,7 +5,7 @@ WINDRES = windres.exe CFLAGS = -O3 -march=native -mtune=native -fbranch-probabilities -fvpt -funroll-loops -fpeel-loops -ftracer -fomit-frame-pointer -ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse -mstackrealign OBJ = 386.o 386_dynarec.o 386_dynarec_ops.o 808x.o acer386sx.o acerm3a.o ali1429.o amstrad.o cdrom-ioctl.o cdrom-iso.o \ cdrom-null.o codegen.o codegen_ops.o codegen_timing_486.o codegen_timing_686.o codegen_timing_pentium.o codegen_timing_winchip.o codegen_x86-64.o compaq.o config.o cpu.o dac.o \ - device.o disc.o disc_86f.o disc_fdi.o disc_img_86box.o disc_random.o disc_td0.o dma.o fdc.o fdc37c665.o fdc37c932fr.o fdd.o fdi2raw.o gameport.o headland.o i430hx.o i430lx.o i430fx.o \ + device.o disc.o disc_86f.o disc_fdi.o disc_imd.o disc_img_86box.o disc_random.o disc_td0.o dma.o fdc.o fdc37c665.o fdc37c932fr.o fdd.o fdi2raw.o gameport.o headland.o i430hx.o i430lx.o i430fx.o \ i430nx.o i430vx.o i440fx.o ide.o intel.o intel_flash.o io.o jim.o joystick_ch_flightstick_pro.o joystick_standard.o joystick_sw_pad.o joystick_tm_fcs.o keyboard.o keyboard_amstrad.o keyboard_at.o \ keyboard_olim24.o keyboard_pcjr.o keyboard_xt.o lpt.o mcr.o mem.o memregs.o model.o mouse.o mouse_ps2.o \ mouse_serial.o ne2000.o neat.o nethandler.o nmi.o nvr.o olivetti_m24.o opti.o pc.o pc87306.o pci.o pic.o piix.o pit.o ppi.o ps1.o rom.o rtc.o \ diff --git a/src/cpu.c b/src/cpu.c index 5eee1f6d0..dd8974b52 100644 --- a/src/cpu.c +++ b/src/cpu.c @@ -105,7 +105,9 @@ uint64_t ecx8x_msr[4] = {0, 0, 0, 0}; uint64_t ecx116_msr = 0; uint64_t ecx11x_msr[4] = {0, 0, 0, 0}; uint64_t ecx11e_msr = 0; +uint64_t ecx186_msr = 0; uint64_t ecx1e0_msr = 0; +uint64_t ecx570_msr = 0; /* AMD K5 and K6 MSR's. */ uint64_t ecx83_msr = 0; @@ -2000,6 +2002,10 @@ void cpu_RDMSR() if (models[model].cpu[cpu_manufacturer].cpus[cpu].cpu_type == CPU_PENTIUMPRO) goto i686_invalid_rdmsr; EAX = eip_msr; break; + case 0x186: + EAX = ecx186_msr & 0xffffffff; + EDX = ecx186_msr >> 32; + break; case 0x1E0: EAX = ecx1e0_msr & 0xffffffff; EDX = ecx1e0_msr >> 32; @@ -2041,6 +2047,10 @@ void cpu_RDMSR() EAX = mtrr_deftype_msr & 0xffffffff; EDX = mtrr_deftype_msr >> 32; break; + case 0x570: + EAX = ecx570_msr & 0xffffffff; + EDX = ecx570_msr >> 32; + break; default: i686_invalid_rdmsr: #ifndef RELEASE_BUILD @@ -2181,6 +2191,9 @@ void cpu_WRMSR() // pclog("WRMSR SYSENTER_EIP: old=%08X, new=%08X\n", eip_msr, EAX); eip_msr = EAX; break; + case 0x186: + ecx186_msr = EAX | ((uint64_t)EDX << 32); + break; case 0x1E0: ecx1e0_msr = EAX | ((uint64_t)EDX << 32); break; @@ -2209,6 +2222,9 @@ void cpu_WRMSR() case 0x2FF: mtrr_deftype_msr = EAX | ((uint64_t)EDX << 32); break; + case 0x570: + ecx570_msr = EAX | ((uint64_t)EDX << 32); + break; default: i686_invalid_wrmsr: #ifndef RELEASE_BUILD diff --git a/src/disc.c b/src/disc.c index 41ff96b05..4bf434f6c 100644 --- a/src/disc.c +++ b/src/disc.c @@ -9,6 +9,7 @@ #include "disc_img.h" #include "disc_86f.h" #include "disc_td0.h" +#include "disc_imd.h" #include "fdc.h" #include "fdd.h" #include "timer.h" @@ -70,13 +71,14 @@ loaders[]= {"720", img_load, img_close, -1}, {"86F", d86f_load, d86f_close, -1}, {"DSK", img_load, img_close, -1}, + {"FDI", fdi_load, fdi_close, -1}, {"FLP", img_load, img_close, -1}, - {"IMG", img_load, img_close, -1}, {"IMA", img_load, img_close, -1}, + {"IMD", imd_load, imd_close, -1}, + {"IMG", img_load, img_close, -1}, {"TD0", td0_load, td0_close, -1}, {"VFD", img_load, img_close, -1}, {"XDF", img_load, img_close, -1}, - {"FDI", fdi_load, fdi_close, -1}, {0,0,0} }; diff --git a/src/disc.h b/src/disc.h index 7160958ea..a3c8033df 100644 --- a/src/disc.h +++ b/src/disc.h @@ -146,5 +146,25 @@ void null_writeback(int drive); void null_poll_write_data(int drive, int side, uint16_t pos, uint8_t data); int null_format_conditions(int drive); void d86f_unregister(int drive); - void d86f_reset_index_hole_pos(int drive, int side); + +uint8_t xdf_physical_sectors[2][2]; +uint8_t xdf_gap3_sizes[2][2]; +uint16_t xdf_trackx_spos[2][8]; + +typedef struct +{ + uint8_t h; + uint8_t r; +} xdf_id_t; + +typedef union +{ + uint16_t word; + xdf_id_t id; +} xdf_sector_t; + +xdf_sector_t xdf_img_layout[2][2][46]; +xdf_sector_t xdf_disk_layout[2][2][38]; + +uint32_t td0_get_raw_tsize(int side_flags, int slower_rpm); diff --git a/src/disc_86f.c b/src/disc_86f.c index 5cc459c26..bf6c33ba9 100644 --- a/src/disc_86f.c +++ b/src/disc_86f.c @@ -832,10 +832,13 @@ void d86f_seek(int drive, int track) uint8_t track_id = track; int sides; int side; +#if 0 int full_size, store_size; int flag_bytes = 5; +#endif sides = d86f_get_sides(drive); +#if 0 full_size = d86f_get_array_size(drive); store_size = full_size << 1; if (d86f_is_encoded(drive)) store_size += full_size; @@ -848,6 +851,7 @@ void d86f_seek(int drive, int track) } } if (d86f_get_sides(drive) == 2) flag_bytes += 4; +#endif if (d86f_is_40_track(drive) && fdd_doublestep_40(drive)) track /= 2; @@ -905,7 +909,7 @@ void d86f_seek(int drive, int track) { if (d86f_is_old_style(drive)) { - fread(d86f[drive].track_layout[side], 1, d86f_get_raw_size(drive), d86f[drive].f); + fread(d86f[drive].track_layout[side], 1, d86f_get_array_size(drive), d86f[drive].f); } else { @@ -916,11 +920,11 @@ void d86f_seek(int drive, int track) } if (d86f_is_encoded(drive)) { - fread(d86f[drive].track_encoded_data[side], 1, d86f_get_raw_size(drive) << 1, d86f[drive].f); + fread(d86f[drive].track_encoded_data[side], 1, d86f_get_array_size(drive) << 1, d86f[drive].f); } else { - fread(d86f[drive].track_data[side], 1, d86f_get_raw_size(drive), d86f[drive].f); + fread(d86f[drive].track_data[side], 1, d86f_get_array_size(drive), d86f[drive].f); } } } @@ -930,6 +934,7 @@ void d86f_writeback(int drive) int track = d86f[drive].cur_track; uint8_t track_id = track; int side; +#if 0 int full_size, store_size; int flag_bytes = 5; @@ -945,6 +950,7 @@ void d86f_writeback(int drive) } } if (d86f_get_sides(drive) == 2) flag_bytes += 4; +#endif if (!d86f[drive].f) { @@ -977,7 +983,7 @@ void d86f_writeback(int drive) { if (d86f_is_old_style(drive)) { - fwrite(d86f[drive].track_layout[side], 1, d86f_get_raw_size(drive), d86f[drive].f); + fwrite(d86f[drive].track_layout[side], 1, d86f_get_array_size(drive), d86f[drive].f); } else { @@ -988,11 +994,11 @@ void d86f_writeback(int drive) } if (d86f_is_encoded(drive)) { - fwrite(d86f[drive].track_encoded_data[side], 1, d86f_get_raw_size(drive) << 1, d86f[drive].f); + fwrite(d86f[drive].track_encoded_data[side], 1, d86f_get_array_size(drive) << 1, d86f[drive].f); } else { - fwrite(d86f[drive].track_data[side], 1, d86f_get_raw_size(drive), d86f[drive].f); + fwrite(d86f[drive].track_data[side], 1, d86f_get_array_size(drive), d86f[drive].f); } } @@ -1897,7 +1903,7 @@ int d86f_poll_check_notfound(int drive) /* The index hole has been hit twice and we're still in a find state. This means sector finding has failed for whatever reason. Abort with sector not found and set state to idle. */ - pclog("d86f_poll(): Sector not found (%i %i %i %i) (%i, %i)\n", d86f[drive].req_sector.id.c, d86f[drive].req_sector.id.h, d86f[drive].req_sector.id.r, d86f[drive].req_sector.id.n, fdc_get_bitcell_period(), d86f_get_bitcell_period(drive)); + // pclog("d86f_poll(): Sector not found (%i %i %i %i) (%i, %i)\n", d86f[drive].req_sector.id.c, d86f[drive].req_sector.id.h, d86f[drive].req_sector.id.r, d86f[drive].req_sector.id.n, fdc_get_bitcell_period(), d86f_get_bitcell_period(drive)); fdc_notfound(); d86f[drive].state = STATE_IDLE; d86f[drive].index_count = 0; diff --git a/src/disc_imd.c b/src/disc_imd.c new file mode 100644 index 000000000..e468d49ea --- /dev/null +++ b/src/disc_imd.c @@ -0,0 +1,717 @@ +/* Copyright holders: Kiririn + see COPYING for more details +*/ +#include "ibm.h" +#include "fdc.h" +#include "fdd.h" +#include "disc.h" +#include "disc_imd.h" + +#include + +typedef struct +{ + uint8_t is_present; + uint32_t file_offs; + uint8_t params[5]; + uint32_t r_map_offs; + uint32_t c_map_offs; + uint32_t h_map_offs; + uint32_t n_map_offs; + uint32_t data_offs; + uint32_t sector_data_offs[255]; + uint32_t sector_data_size[255]; + uint32_t gap3_len; + uint16_t side_flags; +} imd_track_t; + +static struct +{ + FILE *f; + uint8_t *buffer; + uint32_t start_offs; + int track_count, sides; + int track; + uint16_t disk_flags; + imd_track_t tracks[256][2]; + uint16_t current_side_flags[2]; + uint8_t xdf_ordered_pos[256][2]; + uint8_t *current_data[2]; + uint8_t track_buffer[2][25000]; +} imd[2]; + +void imd_init() +{ + memset(imd, 0, sizeof(imd)); +} + +void d86f_register_imd(int drive); + +void imd_load(int drive, char *fn) +{ + uint32_t magic = 0; + uint32_t fsize = 0; + uint8_t *buffer; + uint8_t *buffer2; + int i = 0; + int has_cyl_map = 0; + int has_head_map = 0; + int has_size_map = 0; + int track_spt = 0; + int sector_size = 0; + int track = 0; + int side = 0; + int extra = 0; + int fm = 0; + uint32_t last_offset = 0; + uint32_t data_size = 512; + uint32_t mfm = 0; + uint32_t pre_sector = 0; + uint32_t track_total = 0; + uint32_t raw_tsize = 0; + uint32_t minimum_gap3 = 0; + uint32_t minimum_gap4 = 12; + + d86f_unregister(drive); + + writeprot[drive] = 0; + imd[drive].f = fopen(fn, "rb+"); + if (!imd[drive].f) + { + imd[drive].f = fopen(fn, "rb"); + if (!imd[drive].f) + return; + writeprot[drive] = 1; + } + if (ui_writeprot[drive]) + { + writeprot[drive] = 1; + } + fwriteprot[drive] = writeprot[drive]; + + fseek(imd[drive].f, 0, SEEK_SET); + fread(&magic, 1, 4, imd[drive].f); + if (magic != 0x20444D49) + { + pclog("IMD: Not a valid ImageDisk image\n"); + fclose(imd[drive].f); + return; + } + else + { + pclog("IMD: Valid ImageDisk image\n"); + } + + fseek(imd[drive].f, 0, SEEK_END); + fsize = ftell(imd[drive].f); + + fseek(imd[drive].f, 0, SEEK_SET); + imd[drive].buffer = malloc(fsize); + fread(imd[drive].buffer, 1, fsize, imd[drive].f); + buffer = imd[drive].buffer; + + buffer2 = strchr(buffer, 0x1A); + if (buffer2 == NULL) + { + pclog("IMD: No ASCII EOF character\n"); + fclose(imd[drive].f); + return; + } + else + { + pclog("IMD: ASCII EOF character found at offset %08X\n", buffer2 - buffer); + } + + buffer2++; + if ((buffer2 - buffer) == fsize) + { + pclog("IMD: File ends after ASCII EOF character\n"); + fclose(imd[drive].f); + return; + } + else + { + pclog("IMD: File continues after ASCII EOF character\n"); + } + + imd[drive].start_offs = (buffer2 - buffer); + imd[drive].disk_flags = 0x00; + imd[drive].track_count = 0; + imd[drive].sides = 1; + + while(1) + { + track = buffer2[1]; + side = buffer2[2]; + if (side & 1) imd[drive].sides = 2; + extra = side & 0xC0; + side &= 0x3F; + // pclog("IMD: Loading track %i, side %i\n", track, side); + + imd[drive].tracks[track][side].side_flags = (buffer2[0] % 3); + if (!imd[drive].tracks[track][side].side_flags) imd[drive].disk_flags |= (0x02); + imd[drive].tracks[track][side].side_flags |= (!(buffer2[0] - imd[drive].tracks[track][side].side_flags) ? 0 : 8); + mfm = imd[drive].tracks[track][side].side_flags & 8; + track_total = mfm ? 146 : 73; + pre_sector = mfm ? 60 : 42; + + track_spt = buffer2[3]; + sector_size = buffer2[4]; + if ((track_spt == 15) && (sector_size == 2)) imd[drive].tracks[track][side].side_flags |= 0x20; + if ((track_spt == 16) && (sector_size == 2)) imd[drive].tracks[track][side].side_flags |= 0x20; + if ((track_spt == 17) && (sector_size == 2)) imd[drive].tracks[track][side].side_flags |= 0x20; + if ((track_spt == 8) && (sector_size == 3)) imd[drive].tracks[track][side].side_flags |= 0x20; + imd[drive].tracks[track][side].is_present = 1; + imd[drive].tracks[track][side].file_offs = (buffer2 - buffer); + memcpy(imd[drive].tracks[track][side].params, buffer2, 5); + imd[drive].tracks[track][side].r_map_offs = imd[drive].tracks[track][side].file_offs + 5; + last_offset = imd[drive].tracks[track][side].r_map_offs + track_spt; + + if (extra & 0x80) + { + imd[drive].tracks[track][side].c_map_offs = last_offset; + last_offset += track_spt; + } + + if (extra & 0x40) + { + imd[drive].tracks[track][side].h_map_offs = last_offset; + last_offset += track_spt; + } + + if (sector_size == 0xFF) + { + imd[drive].tracks[track][side].n_map_offs = last_offset; + buffer2 = buffer + last_offset; + last_offset += track_spt; + + imd[drive].tracks[track][side].data_offs = last_offset; + + for (i = 0; i < track_spt; i++) + { + data_size = buffer2[i]; + data_size = 128 << data_size; + imd[drive].tracks[track][side].sector_data_offs[i] = last_offset; + imd[drive].tracks[track][side].sector_data_size[i] = 1; + if (buffer[imd[drive].tracks[track][side].sector_data_offs[i]] != 0) + { + imd[drive].tracks[track][side].sector_data_size[i] += (buffer[imd[drive].tracks[track][side].sector_data_offs[i]] & 1) ? data_size : 1; + } + last_offset += imd[drive].tracks[track][side].sector_data_size[i]; + if (!(buffer[imd[drive].tracks[track][side].sector_data_offs[i]] & 1)) + { + fwriteprot[drive] = writeprot[drive] = 1; + } + track_total += (pre_sector + data_size + 2); + } + } + else + { + imd[drive].tracks[track][side].data_offs = last_offset; + + for (i = 0; i < track_spt; i++) + { + data_size = sector_size; + data_size = 128 << data_size; + imd[drive].tracks[track][side].sector_data_offs[i] = last_offset; + imd[drive].tracks[track][side].sector_data_size[i] = 1; + if (buffer[imd[drive].tracks[track][side].sector_data_offs[i]] != 0) + { + imd[drive].tracks[track][side].sector_data_size[i] += (buffer[imd[drive].tracks[track][side].sector_data_offs[i]] & 1) ? data_size : 1; + } + last_offset += imd[drive].tracks[track][side].sector_data_size[i]; + if (!(buffer[imd[drive].tracks[track][side].sector_data_offs[i]] & 1)) + { + fwriteprot[drive] = writeprot[drive] = 1; + } + track_total += (pre_sector + data_size + 2); + } + } + buffer2 = buffer + last_offset; + + raw_tsize = td0_get_raw_tsize(imd[drive].tracks[track][side].side_flags, 0); + minimum_gap3 = 12 * track_spt; + if ((raw_tsize - track_total) < (minimum_gap3 + minimum_gap4)) + { + /* If we can't fit the sectors with a reasonable minimum gap at perfect RPM, let's try 2% slower. */ + raw_tsize = td0_get_raw_tsize(imd[drive].tracks[track][side].side_flags, 1); + /* Set disk flags so that rotation speed is 2% slower. */ + imd[drive].disk_flags |= (3 << 5); + if ((raw_tsize - track_total) < (minimum_gap3 + minimum_gap4)) + { + /* If we can't fit the sectors with a reasonable minimum gap even at 2% slower RPM, abort. */ + pclog("IMD: Unable to fit the %i sectors in a track\n", track_spt); + fclose(imd[drive].f); + return; + } + } + imd[drive].tracks[track][side].gap3_len = (raw_tsize - track_total - minimum_gap4) / track_spt; + + imd[drive].track_count++; + + if (last_offset >= fsize) + { + break; + } + } + + if (imd[drive].track_count > 43) imd[drive].disk_flags |= 1; /* If the image has more than 43 tracks, then the tracks are thin (96 tpi). */ + if (imd[drive].sides == 2) imd[drive].disk_flags |= 8; /* If the has 2 sides, mark it as such. */ + + d86f_register_imd(drive); + + drives[drive].seek = imd_seek; + + d86f_common_handlers(drive); +} + +void imd_close(int drive) +{ + int i = 0; + int j = 0; + d86f_unregister(drive); + if (imd[drive].f) + { + free(imd[drive].buffer); + for (i = 0; i < 256; i++) + { + memset(&(imd[drive].tracks[i][0]), 0, sizeof(imd_track_t)); + memset(&(imd[drive].tracks[i][1]), 0, sizeof(imd_track_t)); + } + fclose(imd[drive].f); + } + imd[drive].f = NULL; +} + +int imd_track_is_xdf(int drive, int side, int track) +{ + int i, effective_sectors, xdf_sectors; + int high_sectors, low_sectors; + int max_high_id, expected_high_count, expected_low_count; + uint8_t *r_map; + uint8_t *n_map; + uint8_t *data_base; + uint8_t *cur_data; + + effective_sectors = xdf_sectors = high_sectors = low_sectors = 0; + + memset(imd[drive].xdf_ordered_pos[side], 0, 256); + + if (imd[drive].tracks[track][side].params[2] & 0xC0) + { + return 0; + } + if ((imd[drive].tracks[track][side].params[3] != 16) && (imd[drive].tracks[track][side].params[3] != 19)) + { + return 0; + } + r_map = imd[drive].buffer + imd[drive].tracks[track][side].r_map_offs; + data_base = imd[drive].buffer + imd[drive].tracks[track][side].data_offs; + + if (!track) + { + if (imd[drive].tracks[track][side].params[4] != 2) + { + return 0; + } + if (!side) + { + max_high_id = (imd[drive].tracks[track][side].params[3] == 19) ? 0x8B : 0x88; + expected_high_count = (imd[drive].tracks[track][side].params[3] == 19) ? 0x0B : 0x08; + expected_low_count = 8; + } + else + { + max_high_id = (imd[drive].tracks[track][side].params[3] == 19) ? 0x93 : 0x90; + expected_high_count = (imd[drive].tracks[track][side].params[3] == 19) ? 0x13 : 0x10; + expected_low_count = 0; + } + for (i = 0; i < imd[drive].tracks[track][side].params[3]; i++) + { + if ((r_map[i] >= 0x81) && (r_map[i] <= max_high_id)) + { + high_sectors++; + imd[drive].xdf_ordered_pos[r_map[i]][side] = i; + } + if ((r_map[i] >= 0x01) && (r_map[i] <= 0x08)) + { + low_sectors++; + imd[drive].xdf_ordered_pos[r_map[i]][side] = i; + } + if ((high_sectors == expected_high_count) && (low_sectors == expected_low_count)) + { + imd[drive].current_side_flags[side] = (imd[drive].tracks[track][side].params[3] == 19) ? 0x08 : 0x28; + return (imd[drive].tracks[track][side].params[3] == 19) ? 2 : 1; + } + return 0; + } + } + else + { + if (imd[drive].tracks[track][side].params[4] != 0xFF) + { + return 0; + } + + n_map = imd[drive].buffer + imd[drive].tracks[track][side].n_map_offs; + + cur_data = data_base; + for (i = 0; i < imd[drive].tracks[track][side].params[3]; i++) + { + effective_sectors++; + if (!(r_map[i]) && !(n_map[i])) + { + effective_sectors--; + } + if ((r_map[i] == (n_map[i] | 0x80))) + { + xdf_sectors++; + imd[drive].xdf_ordered_pos[r_map[i]][side] = i; + } + cur_data += (128 << ((uint32_t) n_map[i])); + } + if ((effective_sectors == 3) && (xdf_sectors == 3)) + { + td0[drive].current_side_flags[side] = 0x28; + return 1; /* 5.25" 2HD XDF */ + } + if ((effective_sectors == 4) && (xdf_sectors == 4)) + { + td0[drive].current_side_flags[side] = 0x08; + return 2; /* 3.5" 2HD XDF */ + } + return 0; + } +} + +void imd_sector_to_buffer(int drive, int track, int side, uint8_t *buffer, int sector, int len) +{ + int i = 0; + int type = imd[drive].buffer[imd[drive].tracks[track][side].sector_data_offs[sector]]; + if (type == 0) + { + memset(buffer, 0, len); + } + else + { + if (type & 1) + { + memcpy(buffer, &(imd[drive].buffer[imd[drive].tracks[track][side].sector_data_offs[sector] + 1]), len); + } + else + { + memset(buffer, imd[drive].buffer[imd[drive].tracks[track][side].sector_data_offs[sector] + 1], len); + } + } +} + +void imd_seek(int drive, int track) +{ + int side; + + uint8_t id[4] = { 0, 0, 0, 0 }; + uint8_t type, deleted, bad_crc; + + int sector, current_pos; + + int c = 0; + int h = 0; + int r = 0; + int n = 0; + int ssize = 512; + + int track_rate = 0; + + int track_gap2 = 22; + int track_gap3 = 12; + + int xdf_type = 0; + + int is_trackx = 0; + + int xdf_spt = 0; + int xdf_sector = 0; + + int ordered_pos = 0; + + uint8_t *c_map; + uint8_t *h_map; + uint8_t *r_map; + uint8_t *n_map; + uint8_t *data; + uint32_t track_buf_pos[2] = { 0, 0 }; + + if (!imd[drive].f) + return; + + // pclog("IMD: Seeking...\n"); + + if (d86f_is_40_track(drive) && fdd_doublestep_40(drive)) + track /= 2; + + is_trackx = (track == 0) ? 0 : 1; + + imd[drive].track = track; + + imd[drive].current_side_flags[0] = imd[drive].tracks[track][0].side_flags; + imd[drive].current_side_flags[1] = imd[drive].tracks[track][1].side_flags; + + d86f_reset_index_hole_pos(drive, 0); + d86f_reset_index_hole_pos(drive, 1); + + for (side = 0; side < imd[drive].sides; side++) + { + track_rate = imd[drive].current_side_flags[side] & 7; + if (!track_rate && (imd[drive].current_side_flags[side] & 0x20)) track_rate = 4; + + r_map = imd[drive].buffer + imd[drive].tracks[track][side].r_map_offs; + h = imd[drive].tracks[track][side].params[2]; + if (h & 0x80) + { + c_map = imd[drive].buffer + imd[drive].tracks[track][side].c_map_offs; + } + else + { + c = imd[drive].tracks[track][side].params[1]; + } + if (h & 0x40) + { + h_map = imd[drive].buffer + imd[drive].tracks[track][side].h_map_offs; + } + n = imd[drive].tracks[track][side].params[4]; + if (n == 0xFF) + { + n_map = imd[drive].buffer + imd[drive].tracks[track][side].n_map_offs; + track_gap3 = gap3_sizes[track_rate][n_map[0]][imd[drive].tracks[track][side].params[3]]; + } + else + { + track_gap3 = gap3_sizes[track_rate][n][imd[drive].tracks[track][side].params[3]]; + } + if (!track_gap3) + { + track_gap3 = imd[drive].tracks[track][side].gap3_len; + } + + xdf_type = imd_track_is_xdf(drive, side, track); + + current_pos = d86f_prepare_pretrack(drive, side, 0, 1); + + if (!xdf_type) + { + for (sector = 0; sector < imd[drive].tracks[track][side].params[3]; sector++) + { + id[0] = (h & 0x80) ? c_map[sector] : c; + id[1] = (h & 0x40) ? h_map[sector] : (h & 1); + id[2] = r_map[sector]; + id[3] = (n == 0xFF) ? n_map[sector] : n; + ssize = 128 << ((uint32_t) id[3]); + data = imd[drive].track_buffer[side] + track_buf_pos[side]; + type = imd[drive].buffer[imd[drive].tracks[track][side].sector_data_offs[sector]]; + type = (type >> 1) & 7; + deleted = bad_crc = 0; + if ((type == 2) || (type == 4)) deleted = 1; + if ((type == 3) || (type == 4)) bad_crc = 1; + + // pclog("IMD: (%i %i) %i %i %i %i (%i %i) (GPL=%i)\n", track, side, id[0], id[1], id[2], id[3], deleted, bad_crc, track_gap3); + imd_sector_to_buffer(drive, track, side, data, sector, ssize); + current_pos = d86f_prepare_sector(drive, side, current_pos, id, data, ssize, 1, 22, track_gap3, 0, deleted, bad_crc); + track_buf_pos[side] += ssize; + } + } + else + { + xdf_type--; + xdf_spt = xdf_physical_sectors[xdf_type][is_trackx]; + for (sector = 0; sector < xdf_spt; sector++) + { + xdf_sector = (side * xdf_spt) + sector; + id[0] = track; + id[1] = side; + id[2] = xdf_disk_layout[xdf_type][is_trackx][xdf_sector].id.r; + id[3] = is_trackx ? (id[2] & 7) : 2; + ssize = 128 << ((uint32_t) id[3]); + ordered_pos = imd[drive].xdf_ordered_pos[id[2]][side]; + + data = imd[drive].track_buffer[side] + track_buf_pos[side]; + type = imd[drive].buffer[imd[drive].tracks[track][side].sector_data_offs[ordered_pos]]; + type = (type >> 1) & 7; + deleted = bad_crc = 0; + if ((type == 2) || (type == 4)) deleted = 1; + if ((type == 3) || (type == 4)) bad_crc = 1; + imd_sector_to_buffer(drive, track, side, data, ordered_pos, ssize); + + if (is_trackx) + { + current_pos = d86f_prepare_sector(drive, side, xdf_trackx_spos[xdf_type][xdf_sector], id, data, ssize, 1, track_gap2, xdf_gap3_sizes[xdf_type][is_trackx], 0, deleted, bad_crc); + } + else + { + current_pos = d86f_prepare_sector(drive, side, current_pos, id, data, ssize, 1, track_gap2, xdf_gap3_sizes[xdf_type][is_trackx], 0, deleted, bad_crc); + } + + track_buf_pos[side] += ssize; + } + } + } + + // pclog("Seeked to track: %i (%02X, %02X)\n", imd[drive].track, imd[drive].current_side_flags[0], imd[drive].current_side_flags[1]); +} + +uint16_t imd_disk_flags(int drive) +{ + return imd[drive].disk_flags; +} + +uint16_t imd_side_flags(int drive) +{ + int side = 0; + uint8_t sflags = 0; + side = fdd_get_head(drive); + sflags = imd[drive].current_side_flags[side]; + return sflags; +} + +void imd_set_sector(int drive, int side, uint8_t c, uint8_t h, uint8_t r, uint8_t n) +{ + int i = 0; + int track = imd[drive].track; + int sc = 0; + int sh = 0; + int sn = 0; + uint8_t *c_map, *h_map, *r_map, *n_map; + uint8_t id[4] = { 0, 0, 0, 0 }; + sc = imd[drive].tracks[track][side].params[1]; + sh = imd[drive].tracks[track][side].params[2]; + sn = imd[drive].tracks[track][side].params[4]; + if (sh & 0x80) + { + c_map = imd[drive].buffer + imd[drive].tracks[track][side].c_map_offs; + } + if (sh & 0x40) + { + h_map = imd[drive].buffer + imd[drive].tracks[track][side].h_map_offs; + } + r_map = imd[drive].buffer + imd[drive].tracks[track][side].r_map_offs; + if (sn == 0xFF) + { + n_map = imd[drive].buffer + imd[drive].tracks[track][side].n_map_offs; + } + if (c != imd[drive].track) return; + for (i = 0; i < imd[drive].tracks[track][side].params[3]; i++) + { + id[0] = (h & 0x80) ? c_map[i] : sc; + id[1] = (h & 0x40) ? h_map[i] : (sh & 1); + id[2] = r_map[i]; + id[3] = (n == 0xFF) ? n_map[i] : sn; + if ((id[0] == c) && + (id[1] == h) && + (id[2] == r) && + (id[3] == n)) + { + imd[drive].current_data[side] = imd[drive].buffer + imd[drive].tracks[track][side].sector_data_offs[i]; + } + } + return; +} + +void imd_writeback(int drive) +{ + int side; + int track = imd[drive].track; + + if (writeprot[drive]) + { + return; + } + + int i = 0; + + uint8_t *track_buf; + uint8_t *n_map; + + uint8_t h, n, spt; + uint32_t ssize; + + for (side = 0; side < imd[drive].sides; side++) + { + if (imd[drive].tracks[track][side].is_present) + { + track_buf = imd[drive].buffer + imd[drive].tracks[track][side].file_offs; + fseek(imd[drive].f, imd[drive].tracks[track][side].file_offs, SEEK_SET); + h = imd[drive].tracks[track][side].params[2]; + spt = imd[drive].tracks[track][side].params[3]; + n = imd[drive].tracks[track][side].params[4]; + fwrite(imd[drive].tracks[track][side].params, 1, 5, imd[drive].f); + if (h & 0x80) + { + fwrite(imd[drive].buffer + imd[drive].tracks[track][side].c_map_offs, 1, spt, imd[drive].f); + } + if (h & 0x40) + { + fwrite(imd[drive].buffer + imd[drive].tracks[track][side].h_map_offs, 1, spt, imd[drive].f); + } + if (n == 0xFF) + { + n_map = imd[drive].buffer + imd[drive].tracks[track][side].n_map_offs; + fwrite(n_map, 1, spt, imd[drive].f); + } + for (i = 0; i < spt; i++) + { + ssize = (n == 0xFF) ? n_map[i] : n; + ssize = 128 << ssize; + fwrite(imd[drive].buffer + imd[drive].tracks[track][side].sector_data_offs[i], 1, ssize, imd[drive].f); + } + } + } +} + +uint8_t imd_poll_read_data(int drive, int side, uint16_t pos) +{ + int type = imd[drive].current_data[side][0]; + if (!type) + { + return 0; + } + else + { + return (type & 1) ? imd[drive].current_data[side][pos + 1] : imd[drive].current_data[side][1]; + } +} + +void imd_poll_write_data(int drive, int side, uint16_t pos, uint8_t data) +{ + int type = imd[drive].current_data[side][0]; + if (writeprot[drive]) + { + return; + } + if (!(type & 1)) + { + return; /* Should never happen. */ + } + imd[drive].current_data[side][pos + 1] = data; +} + +int imd_format_conditions(int drive) +{ + int track = imd[drive].track; + int side = 0; + int temp = 0; + side = fdd_get_head(drive); + temp = (fdc_get_format_sectors() == imd[drive].tracks[track][side].params[3]); + temp = temp && (fdc_get_format_n() == imd[drive].tracks[track][side].params[4]); + return temp; +} + +void d86f_register_imd(int drive) +{ + d86f_handler[drive].disk_flags = imd_disk_flags; + d86f_handler[drive].side_flags = imd_side_flags; + d86f_handler[drive].writeback = imd_writeback; + d86f_handler[drive].set_sector = imd_set_sector; + d86f_handler[drive].read_data = imd_poll_read_data; + d86f_handler[drive].write_data = imd_poll_write_data; + d86f_handler[drive].format_conditions = imd_format_conditions; + d86f_handler[drive].check_crc = 1; +} diff --git a/src/disc_imd.h b/src/disc_imd.h new file mode 100644 index 000000000..03628b031 --- /dev/null +++ b/src/disc_imd.h @@ -0,0 +1,7 @@ +/* Copyright holders: Kiririn + see COPYING for more details +*/ +void imd_init(); +void imd_load(int drive, char *fn); +void imd_close(int drive); +void imd_seek(int drive, int track); diff --git a/src/disc_img.c b/src/disc_img.c index 79cf1fb75..e6abf55c8 100644 --- a/src/disc_img.c +++ b/src/disc_img.c @@ -68,9 +68,8 @@ static int gap3_sizes[5][8][256] = { [0][1][16] = 0x54, [0][2][19] = 0x48, [0][2][20] = 0x2A, [0][2][21] = 0x0C, - // [0][2][23] = 0x7A, [0][2][23] = 0x01, - // [0][2][24] = 0x38, + [0][3][11] = 0x26, [2][1][10] = 0x32, [2][1][11] = 0x0C, [2][1][15] = 0x36, @@ -84,18 +83,13 @@ static int gap3_sizes[5][8][256] = { [0][1][16] = 0x54, [2][3][5] = 0x74, [3][2][36] = 0x53, [3][2][39] = 0x20, - // [3][2][46] = 0x0E, [3][2][46] = 0x01, - // [3][2][48] = 0x51, [4][1][32] = 0x36, [4][2][15] = 0x54, [4][2][17] = 0x23, [4][2][18] = 0x02, - // [4][2][19] = 0x29, [4][2][19] = 0x01, - [4][3][8] = 0x74, - [4][3][9] = 0x74, - [4][3][10] = 0x74 + [4][3][8] = 0x74 }; /* Needed for formatting! */ diff --git a/src/disc_img_86box.c b/src/disc_img_86box.c index 0fe201a57..963ff6a04 100644 --- a/src/disc_img_86box.c +++ b/src/disc_img_86box.c @@ -32,21 +32,9 @@ static struct static uint8_t dmf_r[21] = { 12, 2, 13, 3, 14, 4, 15, 5, 16, 6, 17, 7, 18, 8, 19, 9, 20, 10, 21, 11, 1 }; static uint8_t xdf_spt[2] = { 6, 8 }; static uint8_t xdf_logical_sectors[2][2] = { { 38, 6 }, { 46, 8 } }; -static uint8_t xdf_physical_sectors[2][2] = { { 16, 3 }, { 19, 4 } }; -static uint8_t xdf_gap3_sizes[2][2] = { { 60, 69 }, { 60, 50 } }; -static uint16_t xdf_trackx_spos[2][8] = { { 0xA7F, 0xF02, 0x11B7, 0xB66, 0xE1B, 0x129E }, { 0x302, 0x7E2, 0xA52, 0x12DA, 0x572, 0xDFA, 0x106A, 0x154A } }; - -typedef struct -{ - uint8_t h; - uint8_t r; -} xdf_id_t; - -typedef union -{ - uint16_t word; - xdf_id_t id; -} xdf_sector_t; +uint8_t xdf_physical_sectors[2][2] = { { 16, 3 }, { 19, 4 } }; +uint8_t xdf_gap3_sizes[2][2] = { { 60, 69 }, { 60, 50 } }; +uint16_t xdf_trackx_spos[2][8] = { { 0xA7F, 0xF02, 0x11B7, 0xB66, 0xE1B, 0x129E }, { 0x302, 0x7E2, 0xA52, 0x12DA, 0x572, 0xDFA, 0x106A, 0x154A } }; /* XDF: Layout of the sectors in the image. */ xdf_sector_t xdf_img_layout[2][2][46] = { { { 0x8100, 0x8200, 0x8300, 0x8400, 0x8500, 0x8600, 0x8700, 0x8800, @@ -124,11 +112,9 @@ int gap3_sizes[5][8][256] = { [0][1][16] = 0x54, [0][2][18] = 0x6C, [0][2][19] = 0x48, [0][2][20] = 0x2A, - // [0][2][21] = 0x0C, [0][2][21] = 0x08, /* Microsoft DMFWRITE.EXE uses this, 0x0C is used by FDFORMAT. */ - // [0][2][23] = 0x7A, [0][2][23] = 0x01, - // [0][2][24] = 0x38, + [0][3][11] = 0x26, [2][1][10] = 0x32, [2][1][11] = 0x0C, [2][1][15] = 0x36, @@ -142,17 +128,12 @@ int gap3_sizes[5][8][256] = { [0][1][16] = 0x54, [2][3][5] = 0x74, [3][2][36] = 0x53, [3][2][39] = 0x20, - // [3][2][46] = 0x0E, [3][2][46] = 0x01, - // [3][2][48] = 0x51, [4][1][32] = 0x36, [4][2][15] = 0x54, [4][2][17] = 0x23, - // [4][2][19] = 0x29, [4][2][19] = 0x01, - [4][3][8] = 0x74, - [4][3][9] = 0x74, - [4][3][10] = 0x74 + [4][3][8] = 0x74 }; void img_writeback(int drive); @@ -291,6 +272,7 @@ void img_load(int drive, char *fn) else if (size <= (180*1024)) { img[drive].sectors = 9; img[drive].tracks = 40; img[drive].sides = 1; } else if (size <= (320*1024)) { img[drive].sectors = 8; img[drive].tracks = 40; } else if (size <= (360*1024)) { img[drive].sectors = 9; img[drive].tracks = 40; } /*Double density*/ + else if (size <= (400*1024)) { img[drive].sectors = 10; img[drive].tracks = 80; img[drive].sides = 1; } /*DEC RX50*/ else if (size <= (640*1024)) { img[drive].sectors = 8; img[drive].tracks = 80; } /*Double density 640k*/ else if (size < (1024*1024)) { img[drive].sectors = 9; img[drive].tracks = 80; } /*Double density*/ else if (size <= 1228800) { img[drive].sectors = 15; img[drive].tracks = 80; } /*High density 1.2MB*/ diff --git a/src/disc_td0.c b/src/disc_td0.c index dc4e79467..bf64ae7e4 100644 --- a/src/disc_td0.c +++ b/src/disc_td0.c @@ -102,6 +102,7 @@ typedef struct int track; int current_sector_index[2]; uint8_t calculated_gap3_lengths[256][2]; + uint8_t xdf_ordered_pos[256][2]; } td0_t; td0_t td0[2]; @@ -492,7 +493,7 @@ void td0_init() void d86f_register_td0(int drive); -static const int rates[3] = { 2, 1, 0 }; +// static const int rates[3] = { 2, 1, 0, 2, 3 }; /* 0 = 250 kbps, 1 = 300 kbps, 2 = 500 kbps, 3 = unknown, 4 = 1000 kbps */ const int max_size = 4*1024*1024; // 4MB ought to be large enough for any floppy const int max_processed_size = 5*1024*1024; uint8_t imagebuf[4*1024*1024]; @@ -587,10 +588,10 @@ void td0_close(int drive) td0[drive].f = NULL; } -uint32_t td0_get_raw_tsize(int drive, int track, int side, int slower_rpm) +uint32_t td0_get_raw_tsize(int side_flags, int slower_rpm) { uint32_t size; - switch(td0[drive].side_flags[track][side] & 0x27) + switch(side_flags & 0x27) { case 0x22: size = slower_rpm ? 5314 : 5208; @@ -636,7 +637,8 @@ int td0_initialize(int drive) int offset = 0; int ret = 0; int gap3_len = 0; - int rate = 0; + // int rate = 0; + int density = 0; int i = 0; int j = 0; int temp_rate = 0; @@ -648,6 +650,7 @@ int td0_initialize(int drive) uint16_t size; uint8_t *dbuf = processed_buf; uint32_t total_size = 0; + uint32_t pre_sector = 0; uint32_t track_size = 0; uint32_t raw_tsize = 0; uint32_t minimum_gap3 = 0; @@ -703,26 +706,40 @@ int td0_initialize(int drive) return 0; } + density = (header[5] >> 1) & 3; + + if (density == 3) + { + pclog("TD0: Unknown density\n"); + return 0; + } + /* We determine RPM from the drive type as well as we possibly can. */ + /* This byte is actually the BIOS floppy drive type read by Teledisk from the CMOS. */ switch(header[6]) { - case 0: /* 5.25" 2DD in 2HD drive: 360 rpm */ - case 2: /* 5.25" 2HD: 360 rpm */ - td0[drive].default_track_flags = ((header[5] & 0x7f) == 2) ? 0x20 : 0x00; + case 0: /* 5.25" 360k in 1.2M drive: 360 rpm + CMOS Drive type: None, value probably reused by Teledisk */ + case 2: /* 5.25" 1.2M 360 rpm */ + case 5: /* 8"/5.25"/3.5" 1.25M 360 rpm */ + td0[drive].default_track_flags = (density == 1) ? 0x20 : 0x21; break; - case 5: /* 8 " 2?D: 360 rpm */ - td0[drive].default_track_flags = 0x20; + case 1: /* 5.25" 360k: 300 rpm */ + case 3: /* 3.5" 720k: 300 rpm */ + td0[drive].default_track_flags = 0x02; break; - case 1: /* 5.25" 2DD: 300 rpm */ - case 3: /* 3.5 " 2DD: 300 rpm */ - case 4: /* 3.5 " 2HD: 300 rpm */ - case 6: /* 3.5 " 2ED?: 300 rpm */ - td0[drive].default_track_flags = 0x00; + case 4: /* 3.5" 1.44M: 300 rpm */ + td0[drive].default_track_flags = (density == 1) ? 0x00 : 0x02; + break; + case 6: /* 3.5" 2.88M: 300 rpm */ + td0[drive].default_track_flags = (density == 1) ? 0x00 : ((density == 2) ? 0x03 : 0x02); break; } - rate = (header[5] & 0x7f) >= 3 ? 0 : rates[header[5] & 0x7f]; - td0[drive].default_track_flags |= rate; + td0[drive].disk_flags = header[5] & 0x06; + + // rate = (header[5] & 0x7f) >= 3 ? 0 : rates[header[5] & 0x7f]; + // td0[drive].default_track_flags |= rate; for (i = 0; i < 256; i++) { @@ -743,7 +760,9 @@ int td0_initialize(int drive) td0[drive].side_flags[track][head] = td0[drive].default_track_flags | (fm ? 0 : 8); td0[drive].track_in_file[track][head] = 1; offset += 4; - track_size = 146; + track_size = fm ? 73 : 146; + pre_sector = fm ? 42 : 60; + for(i = 0; i < track_spt; i++) { hs = &imagebuf[offset]; @@ -826,23 +845,26 @@ int td0_initialize(int drive) dbuf += size; total_size += size; - track_size += size; + track_size += (pre_sector + size + 2); } track_count = track; - track_spt = imagebuf[offset]; - if (track_spt != 255) { td0[drive].track_spt[track][head] = track_spt; - raw_tsize = td0_get_raw_tsize(drive, track, head, 0); + if ((td0[drive].track_spt[track][head] == 8) && (td0[drive].sects[track][head][0].size == 3)) + { + td0[drive].side_flags[track][head] |= 0x20; + } + + raw_tsize = td0_get_raw_tsize(td0[drive].side_flags[track][head], 0); minimum_gap3 = 12 * track_spt; if ((raw_tsize - track_size) < (minimum_gap3 + minimum_gap4)) { /* If we can't fit the sectors with a reasonable minimum gap at perfect RPM, let's try 2% slower. */ - raw_tsize = td0_get_raw_tsize(drive, track, head, 1); + raw_tsize = td0_get_raw_tsize(td0[drive].side_flags[track][head], 1); /* Set disk flags so that rotation speed is 2% slower. */ td0[drive].disk_flags |= (3 << 5); if ((raw_tsize - track_size) < (minimum_gap3 + minimum_gap4)) @@ -853,6 +875,8 @@ int td0_initialize(int drive) } } td0[drive].calculated_gap3_lengths[track][head] = (raw_tsize - track_size - minimum_gap4) / track_spt; + + track_spt = imagebuf[offset]; } } @@ -863,7 +887,7 @@ int td0_initialize(int drive) td0[drive].tracks = track_count + 1; - temp_rate = rate; + temp_rate = td0[drive].default_track_flags & 7; if ((td0[drive].default_track_flags & 0x27) == 0x20) temp_rate = 4; td0[drive].gap3_len = gap3_sizes[temp_rate][td0[drive].sects[0][0][0].size][td0[drive].track_spt[0][0]]; // pclog("GAP3 length for %i %i %i is %i\n", temp_rate, td0[drive].sects[0][0][0].size, td0[drive].track_spt[0][0], td0[drive].gap3_len); @@ -876,14 +900,8 @@ int td0_initialize(int drive) if(head_count == 2) { - td0[drive].disk_flags = 8; /* 2 sides */ + td0[drive].disk_flags |= 8; /* 2 sides */ } - else - { - td0[drive].disk_flags = 0; /* 1 side */ - } - - if (td0[drive].tracks > 43) td0[drive].disk_flags |= 1; /* If the image has more than 43 tracks, then the tracks are thin (96 tpi). */ td0[drive].sides = head_count; @@ -895,6 +913,101 @@ int td0_initialize(int drive) return 1; } +int td0_track_is_xdf(int drive, int side, int track) +{ + uint8_t id[4] = { 0, 0, 0, 0 }; + int i, effective_sectors, xdf_sectors; + int high_sectors, low_sectors; + int max_high_id, expected_high_count, expected_low_count; + + effective_sectors = xdf_sectors = high_sectors = low_sectors = 0; + + memset(td0[drive].xdf_ordered_pos[side], 0, 256); + + if (!track) + { + if ((td0[drive].track_spt[track][side] == 16) || (td0[drive].track_spt[track][side] == 19)) + { + if (!side) + { + max_high_id = (td0[drive].track_spt[track][side] == 19) ? 0x8B : 0x88; + expected_high_count = (td0[drive].track_spt[track][side] == 19) ? 0x0B : 0x08; + expected_low_count = 8; + } + else + { + max_high_id = (td0[drive].track_spt[track][side] == 19) ? 0x93 : 0x90; + expected_high_count = (td0[drive].track_spt[track][side] == 19) ? 0x13 : 0x10; + expected_low_count = 0; + } + for (i = 0; i < td0[drive].track_spt[track][side]; i++) + { + id[0] = td0[drive].sects[track][side][i].track; + id[1] = td0[drive].sects[track][side][i].head; + id[2] = td0[drive].sects[track][side][i].sector; + id[3] = td0[drive].sects[track][side][i].size; + if (!(id[0]) && (id[1] == side) && (id[3] == 2)) + { + if ((id[2] >= 0x81) && (id[2] <= max_high_id)) + { + high_sectors++; + td0[drive].xdf_ordered_pos[id[2]][side] = i; + } + if ((id[2] >= 0x01) && (id[2] <= 0x08)) + { + low_sectors++; + td0[drive].xdf_ordered_pos[id[2]][side] = i; + } + } + } + if ((high_sectors == expected_high_count) && (low_sectors == expected_low_count)) + { + td0[drive].current_side_flags[side] = (td0[drive].track_spt[track][side] == 19) ? 0x08 : 0x28; + return (td0[drive].track_spt[track][side] == 19) ? 2 : 1; + } + // pclog("XDF: %i %i %i %i\n", high_sectors, expected_high_count, low_sectors, expected_low_count); + return 0; + } + else + { + // pclog("XDF: %i sectors per track (%i %i)\n", td0[drive].track_spt[track][side], track, side); + return 0; + } + } + else + { + for (i = 0; i < td0[drive].track_spt[track][side]; i++) + { + id[0] = td0[drive].sects[track][side][i].track; + id[1] = td0[drive].sects[track][side][i].head; + id[2] = td0[drive].sects[track][side][i].sector; + id[3] = td0[drive].sects[track][side][i].size; + effective_sectors++; + if ((id[0] == track) && (id[1] == side) && !(id[2]) && !(id[3])) + { + effective_sectors--; + } + if ((id[0] == track) && (id[1] == side) && (id[2] == (id[3] | 0x80))) + { + xdf_sectors++; + td0[drive].xdf_ordered_pos[id[2]][side] = i; + } + } + // pclog("XDF: %i %i\n", effective_sectors, xdf_sectors); + if ((effective_sectors == 3) && (xdf_sectors == 3)) + { + td0[drive].current_side_flags[side] = 0x28; + return 1; /* 5.25" 2HD XDF */ + } + if ((effective_sectors == 4) && (xdf_sectors == 4)) + { + td0[drive].current_side_flags[side] = 0x08; + return 2; /* 3.5" 2HD XDF */ + } + return 0; + } +} + void td0_seek(int drive, int track) { int side; @@ -906,7 +1019,18 @@ void td0_seek(int drive, int track) int ssize = 512; int track_rate = 0; + + int track_gap2 = 22; int track_gap3 = 12; + + int xdf_type = 0; + + int is_trackx = 0; + + int xdf_spt = 0; + int xdf_sector = 0; + + int ordered_pos = 0; if (!td0[drive].f) return; @@ -914,11 +1038,15 @@ void td0_seek(int drive, int track) if (d86f_is_40_track(drive) && fdd_doublestep_40(drive)) track /= 2; + is_trackx = (track == 0) ? 0 : 1; + td0[drive].track = track; td0[drive].current_side_flags[0] = td0[drive].side_flags[track][0]; td0[drive].current_side_flags[1] = td0[drive].side_flags[track][1]; + // pclog("TD0 Seek: %02X %02X (%02X)\n", td0[drive].current_side_flags[0], td0[drive].current_side_flags[1], td0[drive].disk_flags); + d86f_reset_index_hole_pos(drive, 0); d86f_reset_index_hole_pos(drive, 1); @@ -929,20 +1057,51 @@ void td0_seek(int drive, int track) track_gap3 = gap3_sizes[track_rate][td0[drive].sects[track][side][0].size][td0[drive].track_spt[track][side]]; if (!track_gap3) { - td0[drive].calculated_gap3_lengths[track][side]; + track_gap3 = td0[drive].calculated_gap3_lengths[track][side]; } + track_gap2 = ((td0[drive].current_side_flags[side] & 7) >= 3) ? 41 : 22; + + xdf_type = td0_track_is_xdf(drive, side, track); + current_pos = d86f_prepare_pretrack(drive, side, 0, 1); - for (sector = 0; sector < td0[drive].track_spt[track][side]; sector++) + if (!xdf_type) { - id[0] = td0[drive].sects[track][side][sector].track; - id[1] = td0[drive].sects[track][side][sector].head; - id[2] = td0[drive].sects[track][side][sector].sector; - id[3] = td0[drive].sects[track][side][sector].size; - // pclog("TD0: %i %i %i %i (%i %i) (GPL=%i)\n", id[0], id[1], id[2], id[3], td0[drive].sects[track][side][sector].deleted, td0[drive].sects[track][side][sector].bad_crc, track_gap3); - ssize = 128 << ((uint32_t) td0[drive].sects[track][side][sector].size); - current_pos = d86f_prepare_sector(drive, side, current_pos, id, td0[drive].sects[track][side][sector].data, ssize, 1, 0x22, track_gap3, 0, td0[drive].sects[track][side][sector].deleted, td0[drive].sects[track][side][sector].bad_crc); + for (sector = 0; sector < td0[drive].track_spt[track][side]; sector++) + { + id[0] = td0[drive].sects[track][side][sector].track; + id[1] = td0[drive].sects[track][side][sector].head; + id[2] = td0[drive].sects[track][side][sector].sector; + id[3] = td0[drive].sects[track][side][sector].size; + // pclog("TD0: %i %i %i %i (%i %i) (GPL=%i)\n", id[0], id[1], id[2], id[3], td0[drive].sects[track][side][sector].deleted, td0[drive].sects[track][side][sector].bad_crc, track_gap3); + ssize = 128 << ((uint32_t) td0[drive].sects[track][side][sector].size); + current_pos = d86f_prepare_sector(drive, side, current_pos, id, td0[drive].sects[track][side][sector].data, ssize, 1, track_gap2, track_gap3, 0, td0[drive].sects[track][side][sector].deleted, td0[drive].sects[track][side][sector].bad_crc); + } + } + else + { + xdf_type--; + xdf_spt = xdf_physical_sectors[xdf_type][is_trackx]; + for (sector = 0; sector < xdf_spt; sector++) + { + xdf_sector = (side * xdf_spt) + sector; + id[0] = track; + id[1] = side; + id[2] = xdf_disk_layout[xdf_type][is_trackx][xdf_sector].id.r; + id[3] = is_trackx ? (id[2] & 7) : 2; + ssize = 128 << ((uint32_t) id[3]); + ordered_pos = td0[drive].xdf_ordered_pos[id[2]][side]; + // pclog("TD0: XDF: (%i %i) %i %i %i %i (%i %i) (GPL=%i)\n", track, side, id[0], id[1], id[2], id[3], td0[drive].sects[track][side][ordered_pos].deleted, td0[drive].sects[track][side][ordered_pos].bad_crc, track_gap3); + if (is_trackx) + { + current_pos = d86f_prepare_sector(drive, side, xdf_trackx_spos[xdf_type][xdf_sector], id, td0[drive].sects[track][side][ordered_pos].data, ssize, 1, track_gap2, xdf_gap3_sizes[xdf_type][is_trackx], 0, td0[drive].sects[track][side][ordered_pos].deleted, td0[drive].sects[track][side][ordered_pos].bad_crc); + } + else + { + current_pos = d86f_prepare_sector(drive, side, current_pos, id, td0[drive].sects[track][side][ordered_pos].data, ssize, 1, track_gap2, xdf_gap3_sizes[xdf_type][is_trackx], 0, td0[drive].sects[track][side][ordered_pos].deleted, td0[drive].sects[track][side][ordered_pos].bad_crc); + } + } } } @@ -957,8 +1116,10 @@ uint16_t td0_disk_flags(int drive) uint16_t td0_side_flags(int drive) { int side = 0; + uint8_t sflags = 0; side = fdd_get_head(drive); - return td0[drive].current_side_flags[side]; + sflags = td0[drive].current_side_flags[side]; + return sflags; } void td0_set_sector(int drive, int side, uint8_t c, uint8_t h, uint8_t r, uint8_t n) diff --git a/src/pc.c b/src/pc.c index 45717fae4..31b6b02fb 100644 --- a/src/pc.c +++ b/src/pc.c @@ -276,6 +276,8 @@ void initpc(int argc, char *argv[]) fdi_init(); img_init(); d86f_init(); + td0_init(); + imd_init(); vlan_reset(); //NETWORK network_card_init(network_card_current); diff --git a/src/vid_svga.c b/src/vid_svga.c index 54aca73c1..9a41776be 100644 --- a/src/vid_svga.c +++ b/src/vid_svga.c @@ -212,6 +212,7 @@ void svga_out(uint16_t addr, uint8_t val, void *p) break; case 0x3C8: svga->dac_write = val; + svga->dac_read = val - 1; svga->dac_pos = 0; break; case 0x3C9: diff --git a/src/vid_voodoo.c b/src/vid_voodoo.c index 9b8157d9f..a53fd56cf 100644 --- a/src/vid_voodoo.c +++ b/src/vid_voodoo.c @@ -18,6 +18,25 @@ #define LOD_MAX 8 +#define TEX_DIRTY_SHIFT 10 + +#define TEX_CACHE_MAX 64 + +static uint32_t texture_offset[LOD_MAX+3] = +{ + 0, + 256*256, + 256*256 + 128*128, + 256*256 + 128*128 + 64*64, + 256*256 + 128*128 + 64*64 + 32*32, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1 + 1 +}; + static int tris = 0; static uint64_t status_time = 0; @@ -108,7 +127,7 @@ typedef struct voodoo_params_t int64_t startS, startT, startW, p1; int64_t dSdX, dTdX, dWdX, p2; int64_t dSdY, dTdY, dWdY, p3; - } tmu[1]; + } tmu[2]; uint32_t color0, color1; @@ -129,21 +148,24 @@ typedef struct voodoo_params_t int chromaKey_r, chromaKey_g, chromaKey_b; uint32_t chromaKey; - uint32_t textureMode; - uint32_t tLOD; + uint32_t textureMode[2]; + uint32_t tLOD[2]; - uint32_t texBaseAddr, texBaseAddr1, texBaseAddr2, texBaseAddr38; + uint32_t texBaseAddr[2], texBaseAddr1[2], texBaseAddr2[2], texBaseAddr38[2]; - uint32_t tex_base[LOD_MAX+1]; - int tex_width; - int tex_w_mask[LOD_MAX+1]; - int tex_w_nmask[LOD_MAX+1]; - int tex_h_mask[LOD_MAX+1]; - int tex_shift[LOD_MAX+1]; + uint32_t tex_base[2][LOD_MAX+1]; + int tex_width[2]; + int tex_w_mask[2][LOD_MAX+1]; + int tex_w_nmask[2][LOD_MAX+1]; + int tex_h_mask[2][LOD_MAX+1]; + int tex_shift[2][LOD_MAX+1]; + int tex_lod[2][LOD_MAX+1]; + int tex_entry[2]; + int detail_max[2], detail_bias[2], detail_scale[2]; uint32_t draw_offset, aux_offset; - int tformat; + int tformat[2]; int clipLeft, clipRight, clipLowY, clipHighY; @@ -152,10 +174,18 @@ typedef struct voodoo_params_t uint32_t front_offset; uint32_t swapbufferCMD; - - rgba_u palette[256]; } voodoo_params_t; +typedef struct texture_t +{ + uint32_t base; + uint32_t tLOD; + volatile int refcount, refcount_r[2]; + int is16; + uint32_t palette_checksum; + uint32_t *data; +} texture_t; + typedef struct voodoo_t { mem_mapping_t mapping; @@ -188,12 +218,12 @@ typedef struct voodoo_t int row_width; - uint8_t *fb_mem, *tex_mem; - uint16_t *tex_mem_w; + uint8_t *fb_mem, *tex_mem[2]; + uint16_t *tex_mem_w[2]; int rgb_sel; - uint32_t trexInit1; + uint32_t trexInit1[2]; int swap_count; @@ -214,12 +244,12 @@ typedef struct voodoo_t struct { uint32_t y[4], i[4], q[4]; - } nccTable[2]; + } nccTable[2][2]; - rgba_u palette[256]; + rgba_u palette[2][256]; - rgba_u ncc_lookup[2][256]; - int ncc_dirty; + rgba_u ncc_lookup[2][2][256]; + int ncc_dirty[2]; thread_t *fifo_thread; thread_t *render_thread[2]; @@ -235,8 +265,8 @@ typedef struct voodoo_t int render_threads; int odd_even_mask; - int pixel_count[2], tri_count, frame_count; - int pixel_count_old[2]; + int pixel_count[2], texel_count[2], tri_count, frame_count; + int pixel_count_old[2], texel_count_old[2]; int wr_count, rd_count, tex_count; int retrace_count; @@ -252,6 +282,8 @@ typedef struct voodoo_t int texture_size; uint32_t texture_mask; + int dual_tmus; + fifo_entry_t fifo[FIFO_SIZE]; volatile int fifo_read_idx, fifo_write_idx; int cmd_read, cmd_written; @@ -287,10 +319,19 @@ typedef struct voodoo_t /* the voodoo adds purple lines for some reason */ uint16_t purpleline[1024]; + texture_t texture_cache[2][TEX_CACHE_MAX]; + uint8_t texture_present[2][4096]; + int texture_last_removed; + + uint32_t palette_checksum[2]; + int palette_dirty[2]; + int use_recompiler; void *codegen_data; } voodoo_t; +static inline void wait_for_render_thread_idle(voodoo_t *voodoo); + enum { SST_status = 0x000, @@ -442,7 +483,7 @@ enum SST_textureMode = 0x300, SST_tLOD = 0x304, - + SST_tDetail = 0x308, SST_texBaseAddr = 0x30c, SST_texBaseAddr1 = 0x310, SST_texBaseAddr2 = 0x314, @@ -610,6 +651,7 @@ enum TEX_I8 = 0x3, TEX_AI8 = 0x4, TEX_PAL8 = 0x5, + TEX_ARGB8332 = 0x8, TEX_A8Y4I2Q2 = 0x9, TEX_R5G6B5 = 0xa, TEX_ARGB1555 = 0xb, @@ -622,7 +664,8 @@ enum { TEXTUREMODE_NCC_SEL = (1 << 5), TEXTUREMODE_TCLAMPS = (1 << 6), - TEXTUREMODE_TCLAMPT = (1 << 7) + TEXTUREMODE_TCLAMPT = (1 << 7), + TEXTUREMODE_TRILINEAR = (1 << 30) }; enum @@ -684,6 +727,26 @@ enum CCA_MSELECT_TEX = 4 }; +enum +{ + TC_MSELECT_ZERO = 0, + TC_MSELECT_CLOCAL = 1, + TC_MSELECT_AOTHER = 2, + TC_MSELECT_ALOCAL = 3, + TC_MSELECT_DETAIL = 4, + TC_MSELECT_LOD_FRAC = 5 +}; + +enum +{ + TCA_MSELECT_ZERO = 0, + TCA_MSELECT_CLOCAL = 1, + TCA_MSELECT_AOTHER = 2, + TCA_MSELECT_ALOCAL = 3, + TCA_MSELECT_DETAIL = 4, + TCA_MSELECT_LOD_FRAC = 5 +}; + enum { CC_ADD_CLOCAL = 1, @@ -750,6 +813,8 @@ enum enum { + LOD_ODD = (1 << 18), + LOD_SPLIT = (1 << 19), LOD_S_IS_WIDER = (1 << 20) }; enum @@ -765,7 +830,13 @@ enum FBZCP_TEXTURE_ENABLED = (1 << 27) }; -static void voodoo_update_ncc(voodoo_t *voodoo) +#define TEXTUREMODE_MASK 0x3ffff000 +#define TEXTUREMODE_PASSTHROUGH 0 + +#define TEXTUREMODE_LOCAL_MASK 0x00643000 +#define TEXTUREMODE_LOCAL 0x00241000 + +static void voodoo_update_ncc(voodoo_t *voodoo, int tmu) { int tbl; @@ -781,32 +852,32 @@ static void voodoo_update_ncc(voodoo_t *voodoo) int q_r, q_g, q_b; int r, g, b; - y = (voodoo->nccTable[tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; + y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; - i_r = (voodoo->nccTable[tbl].i[i] >> 18) & 0x1ff; + i_r = (voodoo->nccTable[tmu][tbl].i[i] >> 18) & 0x1ff; if (i_r & 0x100) i_r |= 0xfffffe00; - i_g = (voodoo->nccTable[tbl].i[i] >> 9) & 0x1ff; + i_g = (voodoo->nccTable[tmu][tbl].i[i] >> 9) & 0x1ff; if (i_g & 0x100) i_g |= 0xfffffe00; - i_b = voodoo->nccTable[tbl].i[i] & 0x1ff; + i_b = voodoo->nccTable[tmu][tbl].i[i] & 0x1ff; if (i_b & 0x100) i_b |= 0xfffffe00; - q_r = (voodoo->nccTable[tbl].q[q] >> 18) & 0x1ff; + q_r = (voodoo->nccTable[tmu][tbl].q[q] >> 18) & 0x1ff; if (q_r & 0x100) q_r |= 0xfffffe00; - q_g = (voodoo->nccTable[tbl].q[q] >> 9) & 0x1ff; + q_g = (voodoo->nccTable[tmu][tbl].q[q] >> 9) & 0x1ff; if (q_g & 0x100) q_g |= 0xfffffe00; - q_b = voodoo->nccTable[tbl].q[q] & 0x1ff; + q_b = voodoo->nccTable[tmu][tbl].q[q] & 0x1ff; if (q_b & 0x100) q_b |= 0xfffffe00; - voodoo->ncc_lookup[tbl][col].rgba.r = CLAMP(y + i_r + q_r); - voodoo->ncc_lookup[tbl][col].rgba.g = CLAMP(y + i_g + q_g); - voodoo->ncc_lookup[tbl][col].rgba.b = CLAMP(y + i_b + q_b); - voodoo->ncc_lookup[tbl][col].rgba.a = 0xff; + voodoo->ncc_lookup[tmu][tbl][col].rgba.r = CLAMP(y + i_r + q_r); + voodoo->ncc_lookup[tmu][tbl][col].rgba.g = CLAMP(y + i_g + q_g); + voodoo->ncc_lookup[tmu][tbl][col].rgba.b = CLAMP(y + i_b + q_b); + voodoo->ncc_lookup[tmu][tbl][col].rgba.a = 0xff; } } } @@ -880,23 +951,32 @@ static void voodoo_recalc(voodoo_t *voodoo) // pclog(" fb_read_offset %08X fb_write_offset %08X row_width %i %08x %08x\n", voodoo->fb_read_offset, voodoo->fb_write_offset, voodoo->row_width, voodoo->lfbMode, voodoo->params.fbzMode); } -static void voodoo_recalc_tex(voodoo_t *voodoo) +static void voodoo_recalc_tex(voodoo_t *voodoo, int tmu) { - int aspect = (voodoo->params.tLOD >> 21) & 3; + int aspect = (voodoo->params.tLOD[tmu] >> 21) & 3; int width = 256, height = 256; int shift = 8; int lod; - int lod_min = (voodoo->params.tLOD >> 2) & 15; - int lod_max = (voodoo->params.tLOD >> 8) & 15; - uint32_t base = voodoo->params.texBaseAddr; + int lod_min = (voodoo->params.tLOD[tmu] >> 2) & 15; + int lod_max = (voodoo->params.tLOD[tmu] >> 8) & 15; + uint32_t base = voodoo->params.texBaseAddr[tmu]; + int tex_lod = 0; - if (voodoo->params.tLOD & LOD_S_IS_WIDER) + if (voodoo->params.tLOD[tmu] & LOD_S_IS_WIDER) height >>= aspect; else { width >>= aspect; shift -= aspect; } + + if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD)) + { + width >>= 1; + height >>= 1; + shift--; + tex_lod++; + } for (lod = 0; lod <= LOD_MAX; lod++) { @@ -906,24 +986,370 @@ static void voodoo_recalc_tex(voodoo_t *voodoo) height = 1; if (shift < 0) shift = 0; - voodoo->params.tex_base[lod] = base; - voodoo->params.tex_w_mask[lod] = width - 1; - voodoo->params.tex_w_nmask[lod] = ~(width - 1); - voodoo->params.tex_h_mask[lod] = height - 1; - voodoo->params.tex_shift[lod] = shift; -// pclog("LOD%i base=%08x %i-%i %i,%i wm=%02x hm=%02x sh=%i\n", lod, base, lod_min, lod_max, width, height, voodoo->params.tex_w_mask[lod], voodoo->params.tex_h_mask[lod], voodoo->params.tex_shift[lod]); - - if (voodoo->params.tformat & 8) - base += width * height * 2; - else - base += width * height; + voodoo->params.tex_base[tmu][lod] = base; + voodoo->params.tex_w_mask[tmu][lod] = width - 1; + voodoo->params.tex_w_nmask[tmu][lod] = ~(width - 1); + voodoo->params.tex_h_mask[tmu][lod] = height - 1; + voodoo->params.tex_shift[tmu][lod] = shift; + voodoo->params.tex_lod[tmu][lod] = tex_lod; - width >>= 1; - height >>= 1; - shift--; + if (!(voodoo->params.tLOD[tmu] & LOD_SPLIT) || ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) + { + if (!(voodoo->params.tLOD[tmu] & LOD_ODD) || lod != 0) + { + if (voodoo->params.tformat[tmu] & 8) + base += width * height * 2; + else + base += width * height; + + if (voodoo->params.tLOD[tmu] & LOD_SPLIT) + { + width >>= 2; + height >>= 2; + shift -= 2; + tex_lod += 2; + } + else + { + width >>= 1; + height >>= 1; + shift--; + tex_lod++; + } + } + } } - voodoo->params.tex_width = width; + voodoo->params.tex_width[tmu] = width; +} + +#define makergba(r, g, b, a) ((b) | ((g) << 8) | ((r) << 16) | ((a) << 24)) + +static void use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) +{ + int c; + int lod; + int lod_min, lod_max; + uint32_t addr = 0, addr_end; + uint32_t palette_checksum; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; + +//pclog("use_texture %08x %i %i-%i\n", params->texBaseAddr[tmu], tmu, lod_min,lod_max); + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL88) + { + if (voodoo->palette_dirty[tmu]) + { + palette_checksum = 0; + + for (c = 0; c < 256; c++) + palette_checksum ^= voodoo->palette[tmu][c].u; + + voodoo->palette_checksum[tmu] = palette_checksum; + voodoo->palette_dirty[tmu] = 0; + } + else + palette_checksum = voodoo->palette_checksum[tmu]; + } + else + palette_checksum = 0; + + /*Try to find texture in cache*/ + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base == params->texBaseAddr[tmu] && + voodoo->texture_cache[tmu][c].tLOD == (params->tLOD[tmu] & 0xf00fff) && + voodoo->texture_cache[tmu][c].palette_checksum == palette_checksum) + { + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; + + return; + } + } + + /*Texture not found, search for unused texture*/ + do + { + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_last_removed++; + voodoo->texture_last_removed &= (TEX_CACHE_MAX-1); + if (voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[0] && + (voodoo->render_threads == 1 || voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[1])) + break; + } + if (c == TEX_CACHE_MAX) + wait_for_render_thread_idle(voodoo); + } while (c == TEX_CACHE_MAX); + if (c == TEX_CACHE_MAX) + fatal("Texture cache full!\n"); + + c = voodoo->texture_last_removed; + + voodoo->texture_cache[tmu][c].base = params->texBaseAddr[tmu]; + voodoo->texture_cache[tmu][c].tLOD = params->tLOD[tmu] & 0xf00fff; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; +// pclog(" add new texture to %i tformat=%i %08x LOD=%i-%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max); + + for (lod = lod_min; lod <= lod_max; lod++) + { + uint32_t *base = &voodoo->texture_cache[tmu][c].data[texture_offset[lod]]; + uint32_t tex_addr = params->tex_base[tmu][lod] & voodoo->texture_mask; + int x, y; + int shift = 8 - params->tex_lod[tmu][lod]; + rgba_u *pal; + + //pclog(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]); + + switch (params->tformat[tmu]) + { + case TEX_RGB332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat].r, rgb332[dat].g, rgb332[dat].b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_A8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, dat); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_AI8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba((dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0xf0) | ((dat >> 4) & 0x0f)); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_PAL8: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_ARGB8332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat & 0xff].r, rgb332[dat & 0xff].g, rgb332[dat & 0xff].b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_R5G6B5: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb565[dat].r, rgb565[dat].g, rgb565[dat].b, 0xff); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB1555: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb1555[dat].r, argb1555[dat].g, argb1555[dat].b, argb1555[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB4444: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb4444[dat].r, argb4444[dat].g, argb4444[dat].b, argb4444[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(dat & 0xff, dat & 0xff, dat & 0xff, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_APAL88: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + default: + fatal("Unknown texture format %i\n", params->tformat[tmu]); + } + } + + voodoo->texture_cache[tmu][c].is16 = voodoo->params.tformat[tmu] & 8; + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL88) + voodoo->texture_cache[tmu][c].palette_checksum = palette_checksum; + else + voodoo->texture_cache[tmu][c].palette_checksum = 0; + + addr = voodoo->texture_cache[tmu][c].base + texture_offset[lod_min] * (voodoo->texture_cache[tmu][c].is16 ? 2 : 1); + addr_end = voodoo->texture_cache[tmu][c].base + texture_offset[lod_max+1] * (voodoo->texture_cache[tmu][c].is16 ? 2 : 1); + for (; addr <= addr_end; addr += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; +} + +static void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu) +{ + int wait_for_idle = 0; + int c; + + memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0])); +// pclog("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present)); + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base != -1) + { + int lod_min = (voodoo->texture_cache[tmu][c].tLOD >> 2) & 15; + int lod_max = (voodoo->texture_cache[tmu][c].tLOD >> 8) & 15; + int addr_start = voodoo->texture_cache[tmu][c].base + texture_offset[lod_min] * (voodoo->texture_cache[tmu][c].is16 ? 2 : 1); + int addr_end = voodoo->texture_cache[tmu][c].base + texture_offset[lod_max+1] * (voodoo->texture_cache[tmu][c].is16 ? 2 : 1); + + if (dirty_addr >= (addr_start & voodoo->texture_mask & ~0x3ff) && dirty_addr < (((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff)) + { +// pclog(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base); + + if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] || + (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1])) + wait_for_idle = 1; + + voodoo->texture_cache[tmu][c].base = -1; + } + else + { + for (; addr_start <= addr_end; addr_start += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr_start & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + } + } + } + if (wait_for_idle) + wait_for_render_thread_idle(voodoo); } typedef struct voodoo_state_t @@ -934,28 +1360,26 @@ typedef struct voodoo_state_t { int64_t base_s, base_t, base_w; int lod; - } tmu[1]; + } tmu[2]; int64_t base_w; int lod; - int lod_min, lod_max; + int lod_min[2], lod_max[2]; int dx1, dx2; int y, yend, ydir; int32_t dxAB, dxAC, dxBC; - int tex_b, tex_g, tex_r, tex_a; + int tex_b[2], tex_g[2], tex_r[2], tex_a[2]; int tex_s, tex_t; - int clamp_s, clamp_t; + int clamp_s[2], clamp_t[2]; int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; - uint8_t *tex[LOD_MAX+1]; - uint16_t *tex_w[LOD_MAX+1]; + uint32_t *tex[2][LOD_MAX+1]; int tformat; - rgba_u *palette; - - int *tex_w_mask; - int *tex_h_mask; - int *tex_shift; + int *tex_w_mask[2]; + int *tex_h_mask[2]; + int *tex_shift[2]; + int *tex_lod[2]; uint16_t *fb_mem, *aux_mem; @@ -966,15 +1390,20 @@ typedef struct voodoo_state_t int64_t tmu0_s, tmu0_t; int64_t tmu0_w; + int64_t tmu1_s, tmu1_t; + int64_t tmu1_w; int64_t w; - int pixel_count; + int pixel_count, texel_count; int x, x2; uint32_t w_depth; float log_temp; uint32_t ebp_store; + uint32_t texBaseAddr; + + int lod_frac[2]; } voodoo_state_t; static int voodoo_output = 0; @@ -1083,13 +1512,13 @@ typedef struct voodoo_texture_state_t int tex_shift; } voodoo_texture_state_t; -static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state) +static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) { - uint16_t dat; + uint32_t dat; if (texture_state->s & ~texture_state->w_mask) { - if (state->clamp_s) + if (state->clamp_s[tmu]) { if (texture_state->s < 0) texture_state->s = 0; @@ -1101,7 +1530,7 @@ static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *textu } if (texture_state->t & ~texture_state->h_mask) { - if (state->clamp_t) + if (state->clamp_t[tmu]) { if (texture_state->t < 0) texture_state->t = 0; @@ -1112,99 +1541,20 @@ static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *textu texture_state->t &= texture_state->h_mask; } - if (state->tformat & 8) - dat = state->tex_w[state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; - else - dat = state->tex[state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; - - switch (state->tformat) - { - case TEX_RGB332: - state->tex_r = rgb332[dat].r; - state->tex_g = rgb332[dat].g; - state->tex_b = rgb332[dat].b; - state->tex_a = 0xff; - break; - - case TEX_Y4I2Q2: - state->tex_r = state->palette[dat].rgba.r; - state->tex_g = state->palette[dat].rgba.g; - state->tex_b = state->palette[dat].rgba.b; - state->tex_a = 0xff; - break; - - case TEX_A8: - state->tex_r = state->tex_g = state->tex_b = state->tex_a = dat & 0xff; - break; - - case TEX_I8: - state->tex_r = state->tex_g = state->tex_b = dat & 0xff; - state->tex_a = 0xff; - break; - - case TEX_AI8: - state->tex_r = state->tex_g = state->tex_b = (dat & 0x0f) | ((dat << 4) & 0xf0); - state->tex_a = (dat & 0xf0) | ((dat >> 4) & 0x0f); - break; - - case TEX_PAL8: - state->tex_r = state->palette[dat].rgba.r; - state->tex_g = state->palette[dat].rgba.g; - state->tex_b = state->palette[dat].rgba.b; - state->tex_a = 0xff; - break; - - case TEX_A8Y4I2Q2: - state->tex_r = state->palette[dat & 0xff].rgba.r; - state->tex_g = state->palette[dat & 0xff].rgba.g; - state->tex_b = state->palette[dat & 0xff].rgba.b; - state->tex_a = dat >> 8; - break; - - case TEX_R5G6B5: - state->tex_r = rgb565[dat].r; - state->tex_g = rgb565[dat].g; - state->tex_b = rgb565[dat].b; - state->tex_a = 0xff; - break; - - case TEX_ARGB1555: - state->tex_r = argb1555[dat].r; - state->tex_g = argb1555[dat].g; - state->tex_b = argb1555[dat].b; - state->tex_a = argb1555[dat].a; - break; - - case TEX_ARGB4444: - state->tex_r = argb4444[dat].r; - state->tex_g = argb4444[dat].g; - state->tex_b = argb4444[dat].b; - state->tex_a = argb4444[dat].a; - break; - - case TEX_A8I8: - state->tex_r = state->tex_g = state->tex_b = dat & 0xff; - state->tex_a = dat >> 8; - break; - - case TEX_APAL88: - state->tex_r = state->palette[dat & 0xff].rgba.r; - state->tex_g = state->palette[dat & 0xff].rgba.g; - state->tex_b = state->palette[dat & 0xff].rgba.b; - state->tex_a = dat >> 8; - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } + dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; + + state->tex_b[tmu] = dat & 0xff; + state->tex_g[tmu] = (dat >> 8) & 0xff; + state->tex_r[tmu] = (dat >> 16) & 0xff; + state->tex_a[tmu] = (dat >> 24) & 0xff; } #define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4)) #define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4)) -static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d) +static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) { - uint16_t dat[4]; + rgba_u dat[4]; if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask)) { @@ -1216,7 +1566,7 @@ static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *tex if (_s & ~texture_state->w_mask) { - if (state->clamp_s) + if (state->clamp_s[tmu]) { if (_s < 0) _s = 0; @@ -1228,7 +1578,7 @@ static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *tex } if (_t & ~texture_state->h_mask) { - if (state->clamp_t) + if (state->clamp_t[tmu]) { if (_t < 0) _t = 0; @@ -1238,139 +1588,50 @@ static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *tex else _t &= texture_state->h_mask; } - if (state->tformat & 8) - dat[c] = state->tex_w[state->lod][_s + (_t << texture_state->tex_shift)]; - else - dat[c] = state->tex[state->lod][_s + (_t << texture_state->tex_shift)]; + dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)]; } } else { - if (state->tformat & 8) - { - dat[0] = state->tex_w[state->lod][s + (t << texture_state->tex_shift)]; - dat[1] = state->tex_w[state->lod][s + 1 + (t << texture_state->tex_shift)]; - dat[2] = state->tex_w[state->lod][s + ((t + 1) << texture_state->tex_shift)]; - dat[3] = state->tex_w[state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; - } - else - { - dat[0] = state->tex[state->lod][s + (t << texture_state->tex_shift)]; - dat[1] = state->tex[state->lod][s + 1 + (t << texture_state->tex_shift)]; - dat[2] = state->tex[state->lod][s + ((t + 1) << texture_state->tex_shift)]; - dat[3] = state->tex[state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; - } + dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)]; + dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)]; + dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)]; + dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; } - switch (state->tformat) - { - case TEX_RGB332: - state->tex_r = (rgb332[dat[0]].r * d[0] + rgb332[dat[1]].r * d[1] + rgb332[dat[2]].r * d[2] + rgb332[dat[3]].r * d[3]) >> 8; - state->tex_g = (rgb332[dat[0]].g * d[0] + rgb332[dat[1]].g * d[1] + rgb332[dat[2]].g * d[2] + rgb332[dat[3]].g * d[3]) >> 8; - state->tex_b = (rgb332[dat[0]].b * d[0] + rgb332[dat[1]].b * d[1] + rgb332[dat[2]].b * d[2] + rgb332[dat[3]].b * d[3]) >> 8; - state->tex_a = 0xff; - break; - - case TEX_Y4I2Q2: - state->tex_r = (state->palette[dat[0]].rgba.r * d[0] + state->palette[dat[1]].rgba.r * d[1] + state->palette[dat[2]].rgba.r * d[2] + state->palette[dat[3]].rgba.r * d[3]) >> 8; - state->tex_g = (state->palette[dat[0]].rgba.g * d[0] + state->palette[dat[1]].rgba.g * d[1] + state->palette[dat[2]].rgba.g * d[2] + state->palette[dat[3]].rgba.g * d[3]) >> 8; - state->tex_b = (state->palette[dat[0]].rgba.b * d[0] + state->palette[dat[1]].rgba.b * d[1] + state->palette[dat[2]].rgba.b * d[2] + state->palette[dat[3]].rgba.b * d[3]) >> 8; - state->tex_a = 0xff; - break; - - case TEX_A8: - state->tex_r = state->tex_g = state->tex_b = state->tex_a = (dat[0] * d[0] + dat[1] * d[1] + dat[2] * d[2] + dat[3] * d[3]) >> 8; - break; - - case TEX_I8: - state->tex_r = state->tex_g = state->tex_b = (dat[0] * d[0] + dat[1] * d[1] + dat[2] * d[2] + dat[3] * d[3]) >> 8; - state->tex_a = 0xff; - break; - - case TEX_AI8: - state->tex_r = state->tex_g = state->tex_b = (LOW4(dat[0]) * d[0] + LOW4(dat[1]) * d[1] + LOW4(dat[2]) * d[2] + LOW4(dat[3]) * d[3]) >> 8; - state->tex_a = (HIGH4(dat[0]) * d[0] + HIGH4(dat[1]) * d[1] + HIGH4(dat[2]) * d[2] + HIGH4(dat[3]) * d[3]) >> 8; - break; - - case TEX_PAL8: - state->tex_r = (state->palette[dat[0]].rgba.r * d[0] + state->palette[dat[1]].rgba.r * d[1] + state->palette[dat[2]].rgba.r * d[2] + state->palette[dat[3]].rgba.r * d[3]) >> 8; - state->tex_g = (state->palette[dat[0]].rgba.g * d[0] + state->palette[dat[1]].rgba.g * d[1] + state->palette[dat[2]].rgba.g * d[2] + state->palette[dat[3]].rgba.g * d[3]) >> 8; - state->tex_b = (state->palette[dat[0]].rgba.b * d[0] + state->palette[dat[1]].rgba.b * d[1] + state->palette[dat[2]].rgba.b * d[2] + state->palette[dat[3]].rgba.b * d[3]) >> 8; - state->tex_a = 0xff; - break; - - case TEX_A8Y4I2Q2: - state->tex_r = (state->palette[dat[0] & 0xff].rgba.r * d[0] + state->palette[dat[1] & 0xff].rgba.r * d[1] + state->palette[dat[2] & 0xff].rgba.r * d[2] + state->palette[dat[3] & 0xff].rgba.r * d[3]) >> 8; - state->tex_g = (state->palette[dat[0] & 0xff].rgba.g * d[0] + state->palette[dat[1] & 0xff].rgba.g * d[1] + state->palette[dat[2] & 0xff].rgba.g * d[2] + state->palette[dat[3] & 0xff].rgba.g * d[3]) >> 8; - state->tex_b = (state->palette[dat[0] & 0xff].rgba.b * d[0] + state->palette[dat[1] & 0xff].rgba.b * d[1] + state->palette[dat[2] & 0xff].rgba.b * d[2] + state->palette[dat[3] & 0xff].rgba.b * d[3]) >> 8; - state->tex_a = ((dat[0] >> 8) * d[0] + (dat[1] >> 8) * d[1] + (dat[2] >> 8) * d[2] + (dat[3] >> 8) * d[3]) >> 8; - break; - - case TEX_R5G6B5: - state->tex_r = (rgb565[dat[0]].r * d[0] + rgb565[dat[1]].r * d[1] + rgb565[dat[2]].r * d[2] + rgb565[dat[3]].r * d[3]) >> 8; - state->tex_g = (rgb565[dat[0]].g * d[0] + rgb565[dat[1]].g * d[1] + rgb565[dat[2]].g * d[2] + rgb565[dat[3]].g * d[3]) >> 8; - state->tex_b = (rgb565[dat[0]].b * d[0] + rgb565[dat[1]].b * d[1] + rgb565[dat[2]].b * d[2] + rgb565[dat[3]].b * d[3]) >> 8; - state->tex_a = 0xff; - break; - - case TEX_ARGB1555: - state->tex_r = (argb1555[dat[0]].r * d[0] + argb1555[dat[1]].r * d[1] + argb1555[dat[2]].r * d[2] + argb1555[dat[3]].r * d[3]) >> 8; - state->tex_g = (argb1555[dat[0]].g * d[0] + argb1555[dat[1]].g * d[1] + argb1555[dat[2]].g * d[2] + argb1555[dat[3]].g * d[3]) >> 8; - state->tex_b = (argb1555[dat[0]].b * d[0] + argb1555[dat[1]].b * d[1] + argb1555[dat[2]].b * d[2] + argb1555[dat[3]].b * d[3]) >> 8; - state->tex_a = (argb1555[dat[0]].a * d[0] + argb1555[dat[1]].a * d[1] + argb1555[dat[2]].a * d[2] + argb1555[dat[3]].a * d[3]) >> 8; - break; - - case TEX_ARGB4444: - state->tex_r = (argb4444[dat[0]].r * d[0] + argb4444[dat[1]].r * d[1] + argb4444[dat[2]].r * d[2] + argb4444[dat[3]].r * d[3]) >> 8; - state->tex_g = (argb4444[dat[0]].g * d[0] + argb4444[dat[1]].g * d[1] + argb4444[dat[2]].g * d[2] + argb4444[dat[3]].g * d[3]) >> 8; - state->tex_b = (argb4444[dat[0]].b * d[0] + argb4444[dat[1]].b * d[1] + argb4444[dat[2]].b * d[2] + argb4444[dat[3]].b * d[3]) >> 8; - state->tex_a = (argb4444[dat[0]].a * d[0] + argb4444[dat[1]].a * d[1] + argb4444[dat[2]].a * d[2] + argb4444[dat[3]].a * d[3]) >> 8; - break; - - case TEX_A8I8: - state->tex_r = state->tex_g = state->tex_b = ((dat[0] & 0xff) * d[0] + (dat[1] & 0xff) * d[1] + (dat[2] & 0xff) * d[2] + (dat[3] & 0xff) * d[3]) >> 8; - state->tex_a = ((dat[0] >> 8) * d[0] + (dat[1] >> 8) * d[1] + (dat[2] >> 8) * d[2] + (dat[3] >> 8) * d[3]) >> 8; - break; - - case TEX_APAL88: - state->tex_r = (state->palette[dat[0] & 0xff].rgba.r * d[0] + state->palette[dat[1] & 0xff].rgba.r * d[1] + state->palette[dat[2] & 0xff].rgba.r * d[2] + state->palette[dat[3] & 0xff].rgba.r * d[3]) >> 8; - state->tex_g = (state->palette[dat[0] & 0xff].rgba.g * d[0] + state->palette[dat[1] & 0xff].rgba.g * d[1] + state->palette[dat[2] & 0xff].rgba.g * d[2] + state->palette[dat[3] & 0xff].rgba.g * d[3]) >> 8; - state->tex_b = (state->palette[dat[0] & 0xff].rgba.b * d[0] + state->palette[dat[1] & 0xff].rgba.b * d[1] + state->palette[dat[2] & 0xff].rgba.b * d[2] + state->palette[dat[3] & 0xff].rgba.b * d[3]) >> 8; - state->tex_a = ((dat[0] >> 8) * d[0] + (dat[1] >> 8) * d[1] + (dat[2] >> 8) * d[2] + (dat[3] >> 8) * d[3]) >> 8; - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } + state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8; + state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8; + state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8; + state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8; } -static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state) +static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) { rgba_u tex_samples[4]; voodoo_texture_state_t texture_state; int d[4]; int s, t; + int tex_lod = state->tex_lod[tmu][state->lod]; - texture_state.w_mask = state->tex_w_mask[state->lod]; - texture_state.h_mask = state->tex_h_mask[state->lod]; - texture_state.tex_shift = state->tex_shift[state->lod]; + texture_state.w_mask = state->tex_w_mask[tmu][state->lod]; + texture_state.h_mask = state->tex_h_mask[tmu][state->lod]; + texture_state.tex_shift = 8 - tex_lod; - if (voodoo->bilinear_enabled && params->textureMode & 6) + if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6) { int _ds, dt; - state->tex_s -= 1 << (3+state->lod); - state->tex_t -= 1 << (3+state->lod); + state->tex_s -= 1 << (3+tex_lod); + state->tex_t -= 1 << (3+tex_lod); - s = state->tex_s >> state->lod; - t = state->tex_t >> state->lod; + s = state->tex_s >> tex_lod; + t = state->tex_t >> tex_lod; _ds = s & 0xf; dt = t & 0xf; s >>= 4; t >>= 4; - //if (x == 80) //if (voodoo_output) // pclog("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt); @@ -1381,7 +1642,7 @@ static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, // texture_state.s = s; // texture_state.t = t; - tex_read_4(state, &texture_state, s, t, d); + tex_read_4(state, &texture_state, s, t, d, tmu, x); /* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8; @@ -1404,12 +1665,12 @@ static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, // state->tex_s -= 1 << (17+state->lod); // state->tex_t -= 1 << (17+state->lod); - s = state->tex_s >> (4+state->lod); - t = state->tex_t >> (4+state->lod); + s = state->tex_s >> (4+tex_lod); + t = state->tex_t >> (4+tex_lod); texture_state.s = s; texture_state.t = t; - tex_read(state, &texture_state); + tex_read(state, &texture_state, tmu); /* state->tex_r = tex_samples[0].rgba.r; state->tex_g = tex_samples[0].rgba.g; @@ -1418,6 +1679,51 @@ static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, } } +static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +{ + if (params->textureMode[tmu] & 1) + { + int64_t _w = 0; + if (state->tmu1_w) + _w = (int64_t)((1ULL << 48) / state->tmu1_w); + + if (tmu) + { + state->tex_s = (int32_t)(((state->tmu1_s >> 14) * _w) >> 30); + state->tex_t = (int32_t)(((state->tmu1_t >> 14) * _w) >> 30); + } + else + { + state->tex_s = (int32_t)(((state->tmu0_s >> 14) * _w) >> 30); + state->tex_t = (int32_t)(((state->tmu0_t >> 14) * _w) >> 30); + } + + state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8)); + } + else + { + if (tmu) + { + state->tex_s = (int32_t)(state->tmu1_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu1_t >> (14+14)); + } + else + { + state->tex_s = (int32_t)(state->tmu0_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu0_t >> (14+14)); + } + state->lod = state->tmu[tmu].lod; + } + + if (state->lod < state->lod_min[tmu]) + state->lod = state->lod_min[tmu]; + else if (state->lod > state->lod_max[tmu]) + state->lod = state->lod_max[tmu]; + state->lod_frac[tmu] = state->lod & 0xff; + state->lod >>= 8; + + voodoo_get_texture(voodoo, params, state, tmu, x); +} #define DEPTH_TEST(comp_depth) \ do \ @@ -1717,6 +2023,32 @@ static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, #define cca_reverse_blend ( params->fbzColorPath & (1 << 22)) #define cca_add ( (params->fbzColorPath >> 23) & 3) #define cca_invert_output ( params->fbzColorPath & (1 << 25)) +#define tc_zero_other (params->textureMode[0] & (1 << 12)) +#define tc_sub_clocal (params->textureMode[0] & (1 << 13)) +#define tc_mselect ((params->textureMode[0] >> 14) & 7) +#define tc_reverse_blend (params->textureMode[0] & (1 << 17)) +#define tc_add_clocal (params->textureMode[0] & (1 << 18)) +#define tc_add_alocal (params->textureMode[0] & (1 << 19)) +#define tc_invert_output (params->textureMode[0] & (1 << 20)) +#define tca_zero_other (params->textureMode[0] & (1 << 21)) +#define tca_sub_clocal (params->textureMode[0] & (1 << 22)) +#define tca_mselect ((params->textureMode[0] >> 23) & 7) +#define tca_reverse_blend (params->textureMode[0] & (1 << 26)) +#define tca_add_clocal (params->textureMode[0] & (1 << 27)) +#define tca_add_alocal (params->textureMode[0] & (1 << 28)) +#define tca_invert_output (params->textureMode[0] & (1 << 29)) + +#define tc_sub_clocal_1 (params->textureMode[1] & (1 << 13)) +#define tc_mselect_1 ((params->textureMode[1] >> 14) & 7) +#define tc_reverse_blend_1 (params->textureMode[1] & (1 << 17)) +#define tc_add_clocal_1 (params->textureMode[1] & (1 << 18)) +#define tc_add_alocal_1 (params->textureMode[1] & (1 << 19)) +#define tca_sub_clocal_1 (params->textureMode[1] & (1 << 22)) +#define tca_mselect_1 ((params->textureMode[1] >> 23) & 7) +#define tca_reverse_blend_1 (params->textureMode[1] & (1 << 26)) +#define tca_add_clocal_1 (params->textureMode[1] & (1 << 27)) +#define tca_add_alocal_1 (params->textureMode[1] & (1 << 28)) + #define src_afunc ( (params->alphaMode >> 8) & 0xf) #define dest_afunc ( (params->alphaMode >> 12) & 0xf) #define alpha_func ( (params->alphaMode >> 1) & 7) @@ -1725,6 +2057,247 @@ static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, #define dither ( params->fbzMode & FBZ_DITHER) #define dither2x2 (params->fbzMode & FBZ_DITHER_2x2) +/*Perform texture fetch and blending for both TMUs*/ +static inline voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) +{ + int r,g,b,a; + int c_reverse, a_reverse; + int c_reverse1, a_reverse1; + int factor_r, factor_g, factor_b, factor_a; + + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } + c_reverse1 = c_reverse; + a_reverse1 = a_reverse; + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[1]; + factor_g = state->tex_g[1]; + factor_b = state->tex_b[1]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_r > params->detail_max[1]) + factor_r = params->detail_max[1]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[1]; + break; + } + if (!c_reverse) + { + r = (-state->tex_r[1] * (factor_r + 1)) >> 8; + g = (-state->tex_g[1] * (factor_g + 1)) >> 8; + b = (-state->tex_b[1] * (factor_b + 1)) >> 8; + } + else + { + r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8; + g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8; + b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal_1) + { + r += state->tex_r[1]; + g += state->tex_g[1]; + b += state->tex_b[1]; + } + else if (tc_add_alocal_1) + { + r += state->tex_a[1]; + g += state->tex_a[1]; + b += state->tex_a[1]; + } + state->tex_r[1] = CLAMP(r); + state->tex_g[1] = CLAMP(g); + state->tex_b[1] = CLAMP(b); + } + if (tca_sub_clocal_1) + { + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_AOTHER: + factor_a = 0; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_a > params->detail_max[1]) + factor_a = params->detail_max[1]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[1]; + break; + } + if (!a_reverse) + a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (-state->tex_a[1] * (factor_a + 1)) >> 8; + if (tca_add_clocal_1 || tca_add_alocal_1) + a += state->tex_a[1]; + state->tex_a[1] = CLAMP(a); + } + + voodoo_tmu_fetch(voodoo, params, state, 0, x); + + if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } + + if (!tc_zero_other) + { + r = state->tex_r[1]; + g = state->tex_g[1]; + b = state->tex_b[1]; + } + else + r = g = b = 0; + if (tc_sub_clocal) + { + r -= state->tex_r[0]; + g -= state->tex_g[0]; + b -= state->tex_b[0]; + } + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[0]; + factor_g = state->tex_g[0]; + factor_b = state->tex_b[0]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[0]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_r > params->detail_max[0]) + factor_r = params->detail_max[0]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[0]; + break; + } + if (!c_reverse) + { + r = (r * (factor_r + 1)) >> 8; + g = (g * (factor_g + 1)) >> 8; + b = (b * (factor_b + 1)) >> 8; + } + else + { + r = (r * ((factor_r^0xff) + 1)) >> 8; + g = (g * ((factor_g^0xff) + 1)) >> 8; + b = (b * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal) + { + r += state->tex_r[0]; + g += state->tex_g[0]; + b += state->tex_b[0]; + } + else if (tc_add_alocal) + { + r += state->tex_a[0]; + g += state->tex_a[0]; + b += state->tex_a[0]; + } + + if (!tca_zero_other) + a = state->tex_a[1]; + else + a = 0; + if (tca_sub_clocal) + a -= state->tex_a[0]; + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_AOTHER: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_a > params->detail_max[0]) + factor_a = params->detail_max[0]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[0]; + break; + } + if (!a_reverse) + a = (a * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (a * (factor_a + 1)) >> 8; + if (tca_add_clocal || tca_add_alocal) + a += state->tex_a[0]; + + state->tex_r[0] = CLAMP(r); + state->tex_g[0] = CLAMP(g); + state->tex_b[0] = CLAMP(b); + state->tex_a[0] = CLAMP(a); + + if (tc_invert_output) + { + state->tex_r[0] ^= 0xff; + state->tex_g[0] ^= 0xff; + state->tex_b[0] ^= 0xff; + } + if (tca_invert_output) + state->tex_a[0] ^= 0xff; +} + #if (defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__) #include "vid_voodoo_codegen_x86.h" #elif (defined __amd64__) @@ -1760,25 +2333,39 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood int a_ref = params->alphaMode >> 24; int depth_op = (params->fbzMode >> 5) & 7; int dither = params->fbzMode & FBZ_DITHER;*/ + int texels; int c; uint8_t (*voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y); - - state->clamp_s = params->textureMode & TEXTUREMODE_TCLAMPS; - state->clamp_t = params->textureMode & TEXTUREMODE_TCLAMPT; + + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + texels = 1; + else + texels = 2; + + state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS; + state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT; + state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS; + state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT; // int last_x; // pclog("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir); for (c = 0; c <= LOD_MAX; c++) { - state->tex[c] = &voodoo->tex_mem[params->tex_base[c] & voodoo->texture_mask]; - state->tex_w[c] = (uint16_t *)state->tex[c]; + state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]]; + state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]]; } - state->tformat = params->tformat; + state->tformat = params->tformat[0]; - state->tex_w_mask = params->tex_w_mask; - state->tex_h_mask = params->tex_h_mask; - state->tex_shift = params->tex_shift; + state->tex_w_mask[0] = params->tex_w_mask[0]; + state->tex_h_mask[0] = params->tex_h_mask[0]; + state->tex_shift[0] = params->tex_shift[0]; + state->tex_lod[0] = params->tex_lod[0]; + state->tex_w_mask[1] = params->tex_w_mask[1]; + state->tex_h_mask[1] = params->tex_h_mask[1]; + state->tex_shift[1] = params->tex_shift[1]; + state->tex_lod[1] = params->tex_lod[1]; if ((params->fbzMode & 1) && (ystart < params->clipLowY)) { @@ -1792,6 +2379,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->tmu[0].base_s += params->tmu[0].dSdY*dy; state->tmu[0].base_t += params->tmu[0].dTdY*dy; state->tmu[0].base_w += params->tmu[0].dWdY*dy; + state->tmu[1].base_s += params->tmu[1].dSdY*dy; + state->tmu[1].base_t += params->tmu[1].dTdY*dy; + state->tmu[1].base_w += params->tmu[1].dWdY*dy; state->base_w += params->dWdY*dy; state->xstart += state->dx1*dy; state->xend += state->dx2*dy; @@ -1829,6 +2419,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->tmu0_s = state->tmu[0].base_s; state->tmu0_t = state->tmu[0].base_t; state->tmu0_w = state->tmu[0].base_w; + state->tmu1_s = state->tmu[1].base_s; + state->tmu1_t = state->tmu[1].base_t; + state->tmu1_w = state->tmu[1].base_w; state->w = state->base_w; x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4); @@ -1868,6 +2461,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->tmu0_s += (params->tmu[0].dSdX * dx); state->tmu0_t += (params->tmu[0].dTdX * dx); state->tmu0_w += (params->tmu[0].dWdX * dx); + state->tmu1_s += (params->tmu[1].dSdX * dx); + state->tmu1_t += (params->tmu[1].dTdX * dx); + state->tmu1_w += (params->tmu[1].dWdX * dx); state->w += (params->dWdX * dx); if (voodoo_output) @@ -1889,6 +2485,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->tmu0_s += params->tmu[0].dSdX*dx; state->tmu0_t += params->tmu[0].dTdX*dx; state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; state->w += params->dWdX*dx; x = params->clipLeft; @@ -1910,6 +2509,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood state->tmu0_s += params->tmu[0].dSdX*dx; state->tmu0_t += params->tmu[0].dTdX*dx; state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; state->w += params->dWdX*dx; x = params->clipRight; @@ -1931,6 +2533,7 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood pclog("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2); state->pixel_count = 0; + state->texel_count = 0; state->x = x; state->x2 = x2; @@ -1945,7 +2548,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood { start_x = x; voodoo->pixel_count[odd_even]++; + voodoo->texel_count[odd_even] += texels; voodoo->fbiPixelsIn++; + if (voodoo_output) pclog(" X=%03i T=%08x\n", x, state->tmu0_t); // if (voodoo->fbzMode & FBZ_RGB_WMASK) @@ -1982,8 +2587,8 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood new_depth = CLAMP16(state->z >> 12); if (params->fbzMode & FBZ_DEPTH_BIAS) - new_depth = (new_depth + params->zaColor) & 0xffff; - + new_depth = CLAMP16(new_depth + (int16_t)params->zaColor); + if (params->fbzMode & FBZ_DEPTH_ENABLE) { uint16_t old_depth = aux_mem[x]; @@ -2002,50 +2607,48 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) { - if (params->textureMode & 1) + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) { - int64_t _w = 0; - if (state->tmu0_w) - _w = (int64_t)((1ULL << 48) / state->tmu0_w); - - state->tex_s = (int32_t)(((state->tmu0_s >> 14) * _w) >> 30); - state->tex_t = (int32_t)(((state->tmu0_t >> 14) * _w) >> 30); -// state->lod = state->tmu[0].lod + (int)(log2((double)_w / (double)(1 << 19)) * 256.0); - state->lod = state->tmu[0].lod + (fastlog(_w) - (19 << 8)); + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + voodoo_tmu_fetch(voodoo, params, state, 0, x); + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + state->tex_r[0] = state->tex_r[1]; + state->tex_g[0] = state->tex_g[1]; + state->tex_b[0] = state->tex_b[1]; + state->tex_a[0] = state->tex_a[1]; } else { - state->tex_s = (int32_t)(state->tmu0_s >> (14+14)); - state->tex_t = (int32_t)(state->tmu0_t >> (14+14)); - state->lod = state->tmu[0].lod; + voodoo_tmu_fetch_and_blend(voodoo, params, state, x); } - - if (state->lod < state->lod_min) - state->lod = state->lod_min; - else if (state->lod > state->lod_max) - state->lod = state->lod_max; - state->lod >>= 8; - - voodoo_get_texture(voodoo, params, state); - + if ((params->fbzMode & FBZ_CHROMAKEY) && - state->tex_r == params->chromaKey_r && - state->tex_g == params->chromaKey_g && - state->tex_b == params->chromaKey_b) + state->tex_r[0] == params->chromaKey_r && + state->tex_g[0] == params->chromaKey_g && + state->tex_b[0] == params->chromaKey_b) { voodoo->fbiChromaFail++; goto skip_pixel; } } - if (voodoo->trexInit1 & (1 << 18)) + if (voodoo->trexInit1[0] & (1 << 18)) { - state->tex_r = state->tex_g = 0; - state->tex_b = 1; + state->tex_r[0] = state->tex_g[0] = 0; + + if (voodoo->dual_tmus) + state->tex_b[0] = 1 | (3 << 6); + else + state->tex_b[0] = 1; } if (cc_localselect_override) - sel = (state->tex_a & 0x80) ? 1 : 0; + sel = (state->tex_a[0] & 0x80) ? 1 : 0; else sel = cc_localselect; @@ -2071,9 +2674,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood break; case CC_LOCALSELECT_TEX: /*TREX Color Output*/ - cother_r = state->tex_r; - cother_g = state->tex_g; - cother_b = state->tex_b; + cother_r = state->tex_r[0]; + cother_g = state->tex_g[0]; + cother_b = state->tex_b[0]; break; case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/ @@ -2114,7 +2717,7 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood aother = CLAMP(state->ia >> 12); break; case A_SEL_TEX: - aother = state->tex_a; + aother = state->tex_a[0]; break; case A_SEL_COLOR1: aother = (params->color1 >> 24) & 0xff; @@ -2175,9 +2778,9 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood msel_b = alocal; break; case CC_MSELECT_TEX: - msel_r = state->tex_a; - msel_g = state->tex_a; - msel_b = state->tex_a; + msel_r = state->tex_a[0]; + msel_g = state->tex_a[0]; + msel_b = state->tex_a[0]; break; default: @@ -2202,7 +2805,7 @@ static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, vood msel_a = alocal; break; case CCA_MSELECT_TEX: - msel_a = state->tex_a; + msel_a = state->tex_a[0]; break; default: @@ -2312,6 +2915,9 @@ skip_pixel: state->tmu0_s += params->tmu[0].dSdX; state->tmu0_t += params->tmu[0].dTdX; state->tmu0_w += params->tmu[0].dWdX; + state->tmu1_s += params->tmu[1].dSdX; + state->tmu1_t += params->tmu[1].dTdX; + state->tmu1_w += params->tmu[1].dWdX; state->w += params->dWdX; } else @@ -2324,6 +2930,9 @@ skip_pixel: state->tmu0_s -= params->tmu[0].dSdX; state->tmu0_t -= params->tmu[0].dTdX; state->tmu0_w -= params->tmu[0].dWdX; + state->tmu1_s -= params->tmu[1].dSdX; + state->tmu1_t -= params->tmu[1].dTdX; + state->tmu1_w -= params->tmu[1].dWdX; state->w -= params->dWdX; } @@ -2331,6 +2940,7 @@ skip_pixel: } while (start_x != x2); voodoo->pixel_count[odd_even] += state->pixel_count; + voodoo->texel_count[odd_even] += state->texel_count; voodoo->fbiPixelsIn += state->pixel_count; if (voodoo->params.draw_offset == voodoo->params.front_offset) @@ -2344,10 +2954,16 @@ next_line: state->tmu[0].base_s += params->tmu[0].dSdY; state->tmu[0].base_t += params->tmu[0].dTdY; state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; state->base_w += params->dWdY; state->xstart += state->dx1; state->xend += state->dx2; } + + voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++; + voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++; } static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even) @@ -2384,6 +3000,9 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e state.tmu[0].base_s = params->tmu[0].startS; state.tmu[0].base_t = params->tmu[0].startT; state.tmu[0].base_w = params->tmu[0].startW; + state.tmu[1].base_s = params->tmu[1].startS; + state.tmu[1].base_t = params->tmu[1].startT; + state.tmu[1].base_w = params->tmu[1].startW; state.base_w = params->startW; if (params->fbzColorPath & FBZ_PARAM_ADJUST) @@ -2396,6 +3015,9 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4; state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4; state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4; + state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4; + state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4; + state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4; state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4; } @@ -2438,16 +3060,22 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e else state.dxBC = 0; - state.lod_min = (params->tLOD & 0x3f) << 6; - state.lod_max = ((params->tLOD >> 6) & 0x3f) << 6; - if (state.lod_max > 0x800) - state.lod_max = 0x800; + state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6; + state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6; + if (state.lod_max[0] > 0x800) + state.lod_max[0] = 0x800; + state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6; + state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6; + if (state.lod_max[1] > 0x800) + state.lod_max[1] = 0x800; + state.xstart = state.xend = state.vertexAx << 8; state.xdir = params->sign ? -1 : 1; state.y = (state.vertexAy + 8) >> 4; state.ydir = 1; + tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14); tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14); @@ -2459,12 +3087,28 @@ static void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_e LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); LOD >>= 2; - lodbias = (params->tLOD >> 12) & 0x3f; + lodbias = (params->tLOD[0] >> 12) & 0x3f; if (lodbias & 0x20) lodbias |= ~0x3f; state.tmu[0].lod = LOD + (lodbias << 6); - state.palette = params->palette; + + tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14); + tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14); + + if (tempdx > tempdy) + tempLOD = tempdx; + else + tempLOD = tempdy; + + LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); + LOD >>= 2; + + lodbias = (params->tLOD[1] >> 12) & 0x3f; + if (lodbias & 0x20) + lodbias |= ~0x3f; + state.tmu[1].lod = LOD + (lodbias << 6); + voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even); } @@ -2547,19 +3191,12 @@ static inline void queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/ } } + + use_texture(voodoo, params, 0); + if (voodoo->dual_tmus) + use_texture(voodoo, params, 1); - memcpy(params_new, params, sizeof(voodoo_params_t) - sizeof(voodoo->palette)); - - /*Copy palette data if required*/ - switch (params->tformat) - { - case TEX_PAL8: case TEX_APAL88: - memcpy(params_new->palette, voodoo->palette, sizeof(voodoo->palette)); - break; - case TEX_Y4I2Q2: - memcpy(params_new->palette, voodoo->ncc_lookup[(params->textureMode & TEXTUREMODE_NCC_SEL) ? 1 : 0], sizeof(voodoo->palette)); - break; - } + memcpy(params_new, params, sizeof(voodoo_params_t)); voodoo->params_write_idx++; @@ -2646,8 +3283,8 @@ enum { CHIP_FBI = 0x1, CHIP_TREX0 = 0x2, - CHIP_TREX1 = 0x2, - CHIP_TREX2 = 0x2 + CHIP_TREX1 = 0x4, + CHIP_TREX2 = 0x8 }; static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) @@ -2749,16 +3386,22 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_startS: case SST_remap_startS: if (chip & CHIP_TREX0) voodoo->params.tmu[0].startS = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = ((int64_t)(int32_t)val) << 14; break; case SST_startT: case SST_remap_startT: if (chip & CHIP_TREX0) voodoo->params.tmu[0].startT = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = ((int64_t)(int32_t)val) << 14; break; case SST_startW: case SST_remap_startW: if (chip & CHIP_FBI) voodoo->params.startW = (int64_t)(int32_t)val << 2; if (chip & CHIP_TREX0) voodoo->params.tmu[0].startW = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(int32_t)val << 2; break; case SST_dRdX: case SST_remap_dRdX: @@ -2779,14 +3422,20 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_dSdX: case SST_remap_dSdX: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dSdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = ((int64_t)(int32_t)val) << 14; break; case SST_dTdX: case SST_remap_dTdX: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dTdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = ((int64_t)(int32_t)val) << 14; break; case SST_dWdX: case SST_remap_dWdX: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dWdX = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(int32_t)val << 2; if (chip & CHIP_FBI) voodoo->params.dWdX = (int64_t)(int32_t)val << 2; break; @@ -2809,14 +3458,20 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_dSdY: case SST_remap_dSdY: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dSdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = ((int64_t)(int32_t)val) << 14; break; case SST_dTdY: case SST_remap_dTdY: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dTdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = ((int64_t)(int32_t)val) << 14; break; case SST_dWdY: case SST_remap_dWdY: if (chip & CHIP_TREX0) voodoo->params.tmu[0].dWdY = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(int32_t)val << 2; if (chip & CHIP_FBI) voodoo->params.dWdY = (int64_t)(int32_t)val << 2; break; @@ -2824,9 +3479,11 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_triangleCMD: case SST_remap_triangleCMD: voodoo->params.sign = val & (1 << 31); - if (voodoo->ncc_dirty) - voodoo_update_ncc(voodoo); - voodoo->ncc_dirty = 0; + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; queue_triangle(voodoo, &voodoo->params); @@ -2882,16 +3539,22 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].startS = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fstartT: case SST_remap_fstartT: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].startT = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fstartW: case SST_remap_fstartW: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].startW = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(tempif.f * 4294967296.0f); if (chip & CHIP_FBI) voodoo->params.startW = (int64_t)(tempif.f * 4294967296.0f); break; @@ -2920,16 +3583,22 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dSdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fdTdX: case SST_remap_fdTdX: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dTdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fdWdX: case SST_remap_fdWdX: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dWdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(tempif.f * 4294967296.0f); if (chip & CHIP_FBI) voodoo->params.dWdX = (int64_t)(tempif.f * 4294967296.0f); break; @@ -2958,16 +3627,22 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dSdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fdTdY: case SST_remap_fdTdY: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dTdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = (int64_t)(tempif.f * 4294967296.0f); break; case SST_fdWdY: case SST_remap_fdWdY: tempif.i = val; if (chip & CHIP_TREX0) voodoo->params.tmu[0].dWdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(tempif.f * 4294967296.0f); if (chip & CHIP_FBI) voodoo->params.dWdY = (int64_t)(tempif.f * 4294967296.0f); break; @@ -2975,9 +3650,11 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) case SST_ftriangleCMD: voodoo->params.sign = val & (1 << 31); - if (voodoo->ncc_dirty) - voodoo_update_ncc(voodoo); - voodoo->ncc_dirty = 0; + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; queue_triangle(voodoo, &voodoo->params); @@ -3066,170 +3743,443 @@ static void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) break; case SST_textureMode: - voodoo->params.textureMode = val; - voodoo->params.tformat = (val >> 8) & 0xf; + if (chip & CHIP_TREX0) + { + voodoo->params.textureMode[0] = val; + voodoo->params.tformat[0] = (val >> 8) & 0xf; + } + if (chip & CHIP_TREX1) + { + voodoo->params.textureMode[1] = val; + voodoo->params.tformat[1] = (val >> 8) & 0xf; + } break; case SST_tLOD: - voodoo->params.tLOD = val; - voodoo_recalc_tex(voodoo); + if (chip & CHIP_TREX0) + { + voodoo->params.tLOD[0] = val; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.tLOD[1] = val; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_tDetail: + if (chip & CHIP_TREX0) + { + voodoo->params.detail_max[0] = val & 0xff; + voodoo->params.detail_bias[0] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[0] = (val >> 14) & 7; + } + if (chip & CHIP_TREX1) + { + voodoo->params.detail_max[1] = val & 0xff; + voodoo->params.detail_bias[1] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[1] = (val >> 14) & 7; + } break; - case SST_texBaseAddr: -// pclog("Write texBaseAddr %08x\n", val); - voodoo->params.texBaseAddr = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo); + if (chip & CHIP_TREX0) + { + voodoo->params.texBaseAddr[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.texBaseAddr[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } break; case SST_texBaseAddr1: - voodoo->params.texBaseAddr1 = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo); + if (chip & CHIP_TREX0) + { + voodoo->params.texBaseAddr1[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.texBaseAddr1[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } break; case SST_texBaseAddr2: - voodoo->params.texBaseAddr2 = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo); + if (chip & CHIP_TREX0) + { + voodoo->params.texBaseAddr2[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.texBaseAddr2[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } break; case SST_texBaseAddr38: - voodoo->params.texBaseAddr38 = (val & 0x7ffff) << 3; - voodoo_recalc_tex(voodoo); + if (chip & CHIP_TREX0) + { + voodoo->params.texBaseAddr38[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.texBaseAddr38[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } break; case SST_trexInit1: - voodoo->trexInit1 = val; + if (chip & CHIP_TREX0) + voodoo->trexInit1[0] = val; + if (chip & CHIP_TREX1) + voodoo->trexInit1[1] = val; break; case SST_nccTable0_Y0: - voodoo->nccTable[0].y[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable0_Y1: - voodoo->nccTable[0].y[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable0_Y2: - voodoo->nccTable[0].y[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable0_Y3: - voodoo->nccTable[0].y[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable0_I0: if (!(val & (1 << 31))) { - voodoo->nccTable[0].i[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_I2: if (!(val & (1 << 31))) { - voodoo->nccTable[0].i[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_Q0: if (!(val & (1 << 31))) { - voodoo->nccTable[0].q[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_Q2: if (!(val & (1 << 31))) { - voodoo->nccTable[0].q[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; } if (val & (1 << 31)) { int p = (val >> 23) & 0xfe; - voodoo->palette[p].u = val | 0xff000000; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } } break; case SST_nccTable0_I1: if (!(val & (1 << 31))) { - voodoo->nccTable[0].i[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_I3: if (!(val & (1 << 31))) { - voodoo->nccTable[0].i[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_Q1: if (!(val & (1 << 31))) { - voodoo->nccTable[0].q[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; } case SST_nccTable0_Q3: if (!(val & (1 << 31))) { - voodoo->nccTable[0].q[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; } if (val & (1 << 31)) { int p = ((val >> 23) & 0xfe) | 0x01; - voodoo->palette[p].u = val | 0xff000000; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } } break; case SST_nccTable1_Y0: - voodoo->nccTable[1].y[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Y1: - voodoo->nccTable[1].y[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Y2: - voodoo->nccTable[1].y[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Y3: - voodoo->nccTable[1].y[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_I0: - voodoo->nccTable[1].i[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_I1: - voodoo->nccTable[1].i[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_I2: - voodoo->nccTable[1].i[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_I3: - voodoo->nccTable[1].i[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Q0: - voodoo->nccTable[1].q[0] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Q1: - voodoo->nccTable[1].q[1] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Q2: - voodoo->nccTable[1].q[2] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[2] = val; + voodoo->ncc_dirty[1] = 1; + } break; case SST_nccTable1_Q3: - voodoo->nccTable[1].q[3] = val; - voodoo->ncc_dirty = 1; + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } break; } } @@ -3575,19 +4525,25 @@ static void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) { int lod, s, t; voodoo_t *voodoo = (voodoo_t *)p; + int tmu; - if (addr & 0x600000) + if (addr & 0x400000) return; /*TREX != 0*/ + + tmu = (addr & 0x200000) ? 1 : 0; + + if (tmu && !voodoo->dual_tmus) + return; // pclog("voodoo_tex_writel : %08X %08X %i\n", addr, val, voodoo->params.tformat); lod = (addr >> 17) & 0xf; t = (addr >> 9) & 0xff; - if (voodoo->params.tformat & 8) + if (voodoo->params.tformat[tmu] & 8) s = (addr >> 1) & 0xfe; else { - if (voodoo->params.textureMode & (1 << 31)) + if (voodoo->params.textureMode[tmu] & (1 << 31)) s = addr & 0xfc; else s = (addr >> 1) & 0xfc; @@ -3599,11 +4555,16 @@ static void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) // if (addr >= 0x200000) // return; - if (voodoo->params.tformat & 8) - addr = voodoo->params.tex_base[lod] + s*2 + (t << voodoo->params.tex_shift[lod])*2; + if (voodoo->params.tformat[tmu] & 8) + addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2; else - addr = voodoo->params.tex_base[lod] + s + (t << voodoo->params.tex_shift[lod]); - *(uint32_t *)(&voodoo->tex_mem[addr & voodoo->texture_mask]) = val; + addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]); + if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu); + } + *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val; } static inline void wake_fifo_thread(voodoo_t *voodoo) @@ -3668,7 +4629,7 @@ static uint32_t voodoo_readl(uint32_t addr, void *p) addr &= 0xffffff; cycles -= pci_nonburst_time; - + if (addr & 0x800000) /*Texture*/ { } @@ -3982,14 +4943,10 @@ static void fifo_thread(void *param) voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); break; case FIFO_WRITEL_TEX: - if (!(fifo->addr_type & 0x600000)) - { - wait_for_render_thread_idle(voodoo); + if (!(fifo->addr_type & 0x400000)) voodoo_tex_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); - } break; } - voodoo->fifo_read_idx++; fifo->addr_type = FIFO_INVALID; @@ -4205,8 +5162,8 @@ static void voodoo_filterline(voodoo_t *voodoo, uint16_t *fil, int column, uint1 void voodoo_callback(void *p) { voodoo_t *voodoo = (voodoo_t *)p; - int y_add = enable_overscan ? 16 : 0; - int x_add = enable_overscan ? 8 : 0; + int y_add = enable_overscan ? 16 : 0; + int x_add = enable_overscan ? 8 : 0; if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) { @@ -4301,6 +5258,8 @@ static void voodoo_add_status_info(char *s, int max_len, void *p) char temps[256]; int pixel_count_current[2]; int pixel_count_total; + int texel_count_current[2]; + int texel_count_total; uint64_t new_time = timer_read(); uint64_t status_diff = new_time - status_time; status_time = new_time; @@ -4313,9 +5272,14 @@ static void voodoo_add_status_info(char *s, int max_len, void *p) pixel_count_current[0] = voodoo->pixel_count[0]; pixel_count_current[1] = voodoo->pixel_count[1]; pixel_count_total = (pixel_count_current[0] + pixel_count_current[1]) - (voodoo->pixel_count_old[0] + voodoo->pixel_count_old[1]); - sprintf(temps, "%f Mpixels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/, + texel_count_current[0] = voodoo->texel_count[0]; + texel_count_current[1] = voodoo->texel_count[1]; + texel_count_total = (texel_count_current[0] + texel_count_current[1]) - (voodoo->texel_count_old[0] + voodoo->texel_count_old[1]); + sprintf(temps, "%f Mpixels/sec (%f)\n%f Mtexels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/, (double)pixel_count_total/1000000.0, ((double)pixel_count_total/1000000.0) / ((double)voodoo_render_time[0] / status_diff), + (double)texel_count_total/1000000.0, + ((double)texel_count_total/1000000.0) / ((double)voodoo_render_time[0] / status_diff), (double)voodoo->tri_count/1000.0, ((double)voodoo_time * 100.0) / timer_freq, ((double)voodoo_time * 100.0) / status_diff, voodoo->frame_count, voodoo_recomp, ((double)voodoo_render_time[0] * 100.0) / timer_freq, ((double)voodoo_render_time[0] * 100.0) / status_diff, ((double)voodoo_render_time[1] * 100.0) / timer_freq, ((double)voodoo_render_time[1] * 100.0) / status_diff); @@ -4323,6 +5287,8 @@ static void voodoo_add_status_info(char *s, int max_len, void *p) voodoo->pixel_count_old[0] = pixel_count_current[0]; voodoo->pixel_count_old[1] = pixel_count_current[1]; + voodoo->texel_count_old[0] = texel_count_current[0]; + voodoo->texel_count_old[1] = texel_count_current[1]; voodoo->tri_count = voodoo->frame_count = 0; voodoo->rd_count = voodoo->wr_count = voodoo->tex_count = 0; voodoo_time = 0; @@ -4340,6 +5306,7 @@ static void voodoo_speed_changed(void *p) void *voodoo_init() { int c; + int type; voodoo_t *voodoo = malloc(sizeof(voodoo_t)); memset(voodoo, 0, sizeof(voodoo_t)); @@ -4354,6 +5321,9 @@ void *voodoo_init() #ifndef NO_CODEGEN voodoo->use_recompiler = device_get_config_int("recompiler"); #endif + type = device_get_config_int("type"); + voodoo->dual_tmus = type ? 1 : 0; + voodoo_generate_filter(voodoo); /*generate filter lookup tables*/ pci_add(voodoo_pci_read, voodoo_pci_write, voodoo); @@ -4361,8 +5331,24 @@ void *voodoo_init() mem_mapping_add(&voodoo->mapping, 0, 0, NULL, voodoo_readw, voodoo_readl, NULL, voodoo_writew, voodoo_writel, NULL, 0, voodoo); voodoo->fb_mem = malloc(4 * 1024 * 1024); - voodoo->tex_mem = malloc(voodoo->texture_size * 1024 * 1024); - voodoo->tex_mem_w = (uint16_t *)voodoo->tex_mem; + voodoo->tex_mem[0] = malloc(voodoo->texture_size * 1024 * 1024); + if (voodoo->dual_tmus) + voodoo->tex_mem[1] = malloc(voodoo->texture_size * 1024 * 1024); + voodoo->tex_mem_w[0] = (uint16_t *)voodoo->tex_mem[0]; + voodoo->tex_mem_w[1] = (uint16_t *)voodoo->tex_mem[1]; + + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_cache[0][c].data = malloc((256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1) * 4); + voodoo->texture_cache[0][c].base = -1; /*invalid*/ + voodoo->texture_cache[0][c].refcount = 0; + if (voodoo->dual_tmus) + { + voodoo->texture_cache[1][c].data = malloc((256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1) * 4); + voodoo->texture_cache[1][c].base = -1; /*invalid*/ + voodoo->texture_cache[1][c].refcount = 0; + } + } timer_add(voodoo_callback, &voodoo->timer_count, TIMER_ALWAYS_ENABLED, voodoo); @@ -4439,10 +5425,18 @@ void voodoo_close(void *p) { FILE *f; voodoo_t *voodoo = (voodoo_t *)p; + int c; + #ifndef RELEASE_BUILD f = romfopen("texram.dmp", "wb"); - fwrite(voodoo->tex_mem, 2048*1024, 1, f); + fwrite(voodoo->tex_mem[0], voodoo->texture_size*1024*1024, 1, f); fclose(f); + if (voodoo->dual_tmus) + { + f = romfopen("texram2.dmp", "wb"); + fwrite(voodoo->tex_mem[1], voodoo->texture_size*1024*1024, 1, f); + fclose(f); + } #endif thread_kill(voodoo->fifo_thread); @@ -4456,16 +5450,45 @@ void voodoo_close(void *p) thread_destroy_event(voodoo->wake_render_thread[1]); thread_destroy_event(voodoo->render_not_full_event[0]); thread_destroy_event(voodoo->render_not_full_event[1]); + + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->dual_tmus) + free(voodoo->texture_cache[1][c].data); + free(voodoo->texture_cache[0][c].data); + } #ifndef NO_CODEGEN voodoo_codegen_close(voodoo); #endif free(voodoo->fb_mem); - free(voodoo->tex_mem); + if (voodoo->dual_tmus) + free(voodoo->tex_mem[1]); + free(voodoo->tex_mem[0]); free(voodoo); } static device_config_t voodoo_config[] = { + { + .name = "type", + .description = "Voodoo type", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "Voodoo Graphics", + .value = 0 + }, + { + .description = "Obsidian SB50 + Amethyst (2 TMUs)", + .value = 1 + }, + { + .description = "" + } + }, + .default_int = 0 + }, { .name = "framebuffer_memory", .description = "Framebuffer memory size", diff --git a/src/vid_voodoo_codegen_x86-64.h b/src/vid_voodoo_codegen_x86-64.h index 551c53e49..06f635cd6 100644 --- a/src/vid_voodoo_codegen_x86-64.h +++ b/src/vid_voodoo_codegen_x86-64.h @@ -71,7 +71,575 @@ static double const_1_48 = (double)(1ull << 4); static __m128i alookup[257], aminuslookup[256]; static __m128i minus_254;// = 0xff02ff02ff02ff02ull; -static __m128i bilinear_lookup[256*4]; +static __m128i bilinear_lookup[256*2]; +static __m128i xmm_00_ff_w[2]; +static uint32_t i_00_ff_w[2] = {0, 0xff}; + +static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) +{ + if (params->textureMode[tmu] & 1) + { + addbyte(0x48); /*MOV RBX, state->tmu0_s*/ + addbyte(0x8b); + addbyte(0x9f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x48); /*MOV RAX, (1 << 48)*/ + addbyte(0xb8); + addquad(1ULL << 48); + addbyte(0x48); /*XOR RDX, RDX*/ + addbyte(0x31); + addbyte(0xd2); + addbyte(0x48); /*MOV RCX, state->tmu0_t*/ + addbyte(0x8b); + addbyte(0x8f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0x48); /*CMP state->tmu_w, 0*/ + addbyte(0x83); + addbyte(0xbf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7); + addbyte(0x48); /*IDIV state->tmu_w*/ + addbyte(0xf7); + addbyte(0xbf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0x48); /*SAR RBX, 14*/ + addbyte(0xc1); + addbyte(0xfb); + addbyte(14); + addbyte(0x48); /*SAR RCX, 14*/ + addbyte(0xc1); + addbyte(0xf9); + addbyte(14); + addbyte(0x48); /*IMUL RBX, RAX*/ + addbyte(0x0f); + addbyte(0xaf); + addbyte(0xd8); + addbyte(0x48); /*IMUL RCX, RAX*/ + addbyte(0x0f); + addbyte(0xaf); + addbyte(0xc8); + addbyte(0x48); /*SAR RBX, 30*/ + addbyte(0xc1); + addbyte(0xfb); + addbyte(30); + addbyte(0x48); /*SAR RCX, 30*/ + addbyte(0xc1); + addbyte(0xf9); + addbyte(30); + addbyte(0x48); /*BSR EDX, RAX*/ + addbyte(0x0f); + addbyte(0xbd); + addbyte(0xd0); + addbyte(0x48); /*SHL RAX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + addbyte(0x89); /*MOV state->tex_t, ECX*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV ECX, EDX*/ + addbyte(0xd1); + addbyte(0x83); /*SUB EDX, 19*/ + addbyte(0xea); + addbyte(19); + addbyte(0x48); /*SHR RAX, CL*/ + addbyte(0xd3); + addbyte(0xe8); + addbyte(0xc1); /*SHL EDX, 8*/ + addbyte(0xe2); + addbyte(8); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); + addbyte(0x89); /*MOV state->tex_s, EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x0f); /*MOVZX EAX, logtable[RAX]*/ + addbyte(0xb6); + addbyte(0x80); + addlong((uint32_t)logtable); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0x03); /*ADD EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tmu[tmu].lod)); + addbyte(0x3b); /*CMP EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ + addbyte(0x4c); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x3b); /*CMP EAX, state->lod_max*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ + addbyte(0x4d); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + else + { + addbyte(0x48); /*MOV RAX, state->tmu0_s*/ + addbyte(0x8b); + addbyte(0x87); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x48); /*MOV RCX, state->tmu0_t*/ + addbyte(0x8b); + addbyte(0x8f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0x48); /*SHR RAX, 28*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(28); + addbyte(0x8b); /*MOV EBX, state->lod_min*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x48); /*SHR RCX, 28*/ + addbyte(0xc1); + addbyte(0xe9); + addbyte(28); + addbyte(0x48); /*MOV state->tex_s, RAX*/ + addbyte(0x89); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x48); /*MOV state->tex_t, RCX*/ + addbyte(0x89); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV state->lod, EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + } + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6)) + { + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xbd); /*MOV EBP, 1*/ + addlong(1); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); +// addbyte(0x8a); /*MOV DL, params->tex_shift[RSI+ECX*4]*/ +// addbyte(0x94); +// addbyte(0x8e); +// addlong(offsetof(voodoo_params_t, tex_shift)); + addbyte(0xd3); /*SHL EBP, CL*/ + addbyte(0xe5); + addbyte(0x8b); /*MOV EAX, state->tex_s[RDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0xc1); /*SHL EBP, 3*/ + addbyte(0xe5); + addbyte(3); + addbyte(0x8b); /*MOV EBX, state->tex_t[RDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x29); /*SUB EAX, EBP*/ + addbyte(0xe8); + addbyte(0x29); /*SUB EBX, EBP*/ + addbyte(0xeb); + addbyte(0xd3); /*SAR EAX, CL*/ + addbyte(0xf8); + addbyte(0xd3); /*SAR EBX, CL*/ + addbyte(0xfb); + addbyte(0x89); /*MOV EBP, EAX*/ + addbyte(0xc5); + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + addbyte(0x83); /*AND EBP, 0xf*/ + addbyte(0xe5); + addbyte(0xf); + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0xc1); /*SAR EAX, 4*/ + addbyte(0xf8); + addbyte(4); + addbyte(0x81); /*AND ECX, 0xf0*/ + addbyte(0xe1); + addlong(0xf0); + addbyte(0xc1); /*SAR EBX, 4*/ + addbyte(0xfb); + addbyte(4); + addbyte(0x09); /*OR EBP, ECX*/ + addbyte(0xcd); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xc1); /*SHL EBP, 5*/ + addbyte(0xe5); + addbyte(5); + /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ + addbyte(0x48); /*LEA RSI, [RSI+RCX*4]*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x8e); + addbyte(0x89); /*MOV ebp_store, EBP*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ + addbyte(0x8b); + addbyte(0xac); + addbyte(0xcf); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + if (!state->clamp_s[tmu]) + { + addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + } + addbyte(0x83); /*ADD EDX, 1*/ + addbyte(0xc2); + addbyte(1); + if (state->clamp_t[tmu]) + { + addbyte(0x0f); /*CMOVS EDX, zero*/ + addbyte(0x48); + addbyte(0x14); + addbyte(0x25); + addlong(&zero); + addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x85); /*TEST EBX,EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1c); + addbyte(0x25); + addlong(&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + else + { + addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + /*EAX = S, EBX = T0, EDX = T1*/ + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0xd3); /*SHL EDX, CL*/ + addbyte(0xe2); + addbyte(0x48); /*LEA RBX,[RBP+RBX*4]*/ + addbyte(0x8d); + addbyte(0x5c); + addbyte(0x9d); + addbyte(0); + addbyte(0x48); /*LEA RDX,[RBP+RDX*4]*/ + addbyte(0x8d); + addbyte(0x54); + addbyte(0x95); + addbyte(0); + if (state->clamp_s[tmu]) + { + addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x8b); /*MOV ebp_store2, RSI*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x04); + addbyte(0x25); + addlong(&zero); + addbyte(0x78); /*JS + - clamp on 0*/ + addbyte(2+3+2+ 5+5+2); + addbyte(0x3b); /*CMP EAX, EBP*/ + addbyte(0xc5); + addbyte(0x0f); /*CMOVAE EAX, EBP*/ + addbyte(0x43); + addbyte(0xc5); + addbyte(0x73); /*JAE + - clamp on +*/ + addbyte(5+5+2); + } + else + { + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x8b); /*MOV ebp_store2, ESI*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x74); /*JE +*/ + addbyte(5+5+2); + } + + addbyte(0xf3); /*MOVQ XMM0, [RBX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x04); + addbyte(0x83); + addbyte(0xf3); /*MOVQ XMM1, [RDX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x0c); + addbyte(0x82); + + if (state->clamp_s) + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+4+4); + + /*S clamped - the two S coordinates are the same*/ + addbyte(0x66); /*MOVD XMM0, [RBX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [RDX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0); + addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc9); + } + else + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+5+5+6+6); + + /*S wrapped - the two S coordinates are not contiguous*/ + addbyte(0x66); /*MOVD XMM0, [RBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [RDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PINSRW XMM0, [RBX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x03); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM1, [RDX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x0a); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM0, 2[RBX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x43); + addbyte(0x02); + addbyte(0x03); + addbyte(0x66); /*PINSRW XMM1, 2[RDX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x4a); + addbyte(0x02); + addbyte(0x03); + } + + addbyte(0x49); /*MOV R8, bilinear_lookup*/ + addbyte(0xb8); + addquad(bilinear_lookup); + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + + addbyte(0x4c); /*ADD RSI, R8*/ + addbyte(0x01); + addbyte(0xc6); + + addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x06); + addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x4e); + addbyte(0x10); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 0); + addbyte(8); + addbyte(0x66); /*PSRLW XMM1, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 1); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*MOV XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xc0 | 0 | (1 << 3)); + addbyte(0x66); /*PSRLDQ XMM0, 64*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd8); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + addbyte(0x4c); /*MOV RSI, R15*/ + addbyte(0x89); + addbyte(0xfe); + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + } + else + { + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ + addbyte(0x8b); + addbyte(0xac); + addbyte(0xcf); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0x80); /*ADD CL, 4*/ + addbyte(0xc1); + addbyte(4); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0xd3); /*SHR EAX, CL*/ + addbyte(0xe8); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + if (state->clamp_s) + { + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x04); + addbyte(0x25); + addlong(&zero); + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + + } + else + { + addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + } + if (state->clamp_t) + { + addbyte(0x85); /*TEST EBX, EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1c); + addbyte(0x25); + addlong(&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + else + { + addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0x01); /*ADD EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX, [RBP+RBX*4]*/ + addbyte(0x44); + addbyte(0x9d); + addbyte(0); + } + } + + return block_pos; +} static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) { @@ -321,2032 +889,745 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo /*EDI = state, ESI = params*/ - if (params->textureMode & 1) + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) { - addbyte(0x48); /*MOV RBX, state->tmu0_s*/ - addbyte(0x8b); - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tmu0_s)); - addbyte(0x48); /*MOV RAX, (1 << 48)*/ - addbyte(0xb8); - addquad(1ULL << 48); - addbyte(0x48); /*XOR RDX, RDX*/ - addbyte(0x31); - addbyte(0xd2); - addbyte(0x48); /*MOV RCX, state->tmu0_t*/ - addbyte(0x8b); - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tmu0_t)); - addbyte(0x48); /*CMP state->tmu_w, 0*/ - addbyte(0x83); - addbyte(0xbf); - addlong(offsetof(voodoo_state_t, tmu0_w)); - addbyte(0); - addbyte(0x74); /*JZ +*/ - addbyte(7); - addbyte(0x48); /*IDIV state->tmu_w*/ - addbyte(0xf7); - addbyte(0xbf); - addlong(offsetof(voodoo_state_t, tmu0_w)); - addbyte(0x48); /*SAR RBX, 14*/ - addbyte(0xc1); - addbyte(0xfb); - addbyte(14); - addbyte(0x48); /*SAR RCX, 14*/ - addbyte(0xc1); - addbyte(0xf9); - addbyte(14); - addbyte(0x48); /*IMUL RBX, RAX*/ + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ addbyte(0x0f); - addbyte(0xaf); - addbyte(0xd8); - addbyte(0x48); /*IMUL RCX, RAX*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0xc8); - addbyte(0x48); /*SAR RBX, 30*/ - addbyte(0xc1); - addbyte(0xfb); - addbyte(30); - addbyte(0x48); /*SAR RCX, 30*/ - addbyte(0xc1); - addbyte(0xf9); - addbyte(30); - addbyte(0x48); /*BSR EDX, RAX*/ - addbyte(0x0f); - addbyte(0xbd); - addbyte(0xd0); - addbyte(0x48); /*SHL RAX, 8*/ - addbyte(0xc1); - addbyte(0xe0); - addbyte(8); - addbyte(0x89); /*MOV state->tex_t, ECX*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0x89); /*MOV ECX, EDX*/ - addbyte(0xd1); - addbyte(0x83); /*SUB EDX, 19*/ - addbyte(0xea); - addbyte(19); - addbyte(0x48); /*SHR RAX, CL*/ - addbyte(0xd3); - addbyte(0xe8); - addbyte(0xc1); /*SHL EDX, 8*/ - addbyte(0xe2); - addbyte(8); - addbyte(0x25); /*AND EAX, 0xff*/ - addlong(0xff); - addbyte(0x89); /*MOV state->tex_s, EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x0f); /*MOVZX EAX, logtable[RAX]*/ - addbyte(0xb6); - addbyte(0x80); - addlong((uint32_t)logtable); - addbyte(0x09); /*OR EAX, EDX*/ - addbyte(0xd0); - addbyte(0x03); /*ADD EAX, state->lod*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tmu[0].lod)); - addbyte(0x3b); /*CMP EAX, state->lod_min*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ - addbyte(0x4c); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0x3b); /*CMP EAX, state->lod_max*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_max)); - addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ - addbyte(0x4d); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_max)); + addbyte(0x6e); + addbyte(0xc0); addbyte(0xc1); /*SHR EAX, 8*/ addbyte(0xe8); - addbyte(8); - addbyte(0x89); /*MOV state->lod, EAX*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod)); - } - else - { - addbyte(0x48); /*MOV RAX, state->tmu0_s*/ - addbyte(0x8b); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tmu0_s)); - addbyte(0x48); /*MOV RCX, state->tmu0_t*/ - addbyte(0x8b); - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tmu0_t)); - addbyte(0x48); /*SHR RAX, 28*/ - addbyte(0xc1); - addbyte(0xe8); - addbyte(28); - addbyte(0x8b); /*MOV EBX, state->lod_min*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0x48); /*SHR RCX, 28*/ - addbyte(0xc1); - addbyte(0xe9); - addbyte(28); - addbyte(0x48); /*MOV state->tex_s, RAX*/ - addbyte(0x89); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0xc1); /*SHR EBX, 8*/ - addbyte(0xeb); - addbyte(8); - addbyte(0x48); /*MOV state->tex_t, RCX*/ - addbyte(0x89); - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0x89); /*MOV state->lod, EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, lod)); - } - - - if (voodoo->trexInit1 & (1 << 18)) - { - addbyte(0xb8); /*MOV EAX, 0x000001*/ - addlong(0x000001); - addbyte(0x66); /*MOVD XMM0, EAX*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0xc0); - } - else if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) - { - if (voodoo->bilinear_enabled && (params->textureMode & 6)) - { - addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0xbd); /*MOV EBP, 1*/ - addlong(1); - addbyte(0x8a); /*MOV DL, params->tex_shift[RSI+ECX*4]*/ - addbyte(0x94); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_shift)); - addbyte(0xd3); /*SHL EBP, CL*/ - addbyte(0xe5); - addbyte(0x8b); /*MOV EAX, state->tex_s[RDI]*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0xc1); /*SHL EBP, 3*/ - addbyte(0xe5); - addbyte(3); - addbyte(0x8b); /*MOV EBX, state->tex_t[RDI]*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0x29); /*SUB EAX, EBP*/ - addbyte(0xe8); - addbyte(0x29); /*SUB EBX, EBP*/ - addbyte(0xeb); - addbyte(0xd3); /*SAR EAX, CL*/ - addbyte(0xf8); - addbyte(0xd3); /*SAR EBX, CL*/ - addbyte(0xfb); - addbyte(0x89); /*MOV EBP, EAX*/ - addbyte(0xc5); - addbyte(0x89); /*MOV ECX, EBX*/ - addbyte(0xd9); - addbyte(0x83); /*AND EBP, 0xf*/ - addbyte(0xe5); - addbyte(0xf); - addbyte(0xc1); /*SHL ECX, 4*/ - addbyte(0xe1); - addbyte(4); - addbyte(0xc1); /*SAR EAX, 4*/ - addbyte(0xf8); - addbyte(4); - addbyte(0x81); /*AND ECX, 0xf0*/ - addbyte(0xe1); - addlong(0xf0); - addbyte(0xc1); /*SAR EBX, 4*/ - addbyte(0xfb); - addbyte(4); - addbyte(0x09); /*OR EBP, ECX*/ - addbyte(0xcd); - addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0xc1); /*SHL EBP, 6*/ - addbyte(0xe5); - addbyte(6); - /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ - addbyte(0x48); /*LEA RSI, [RSI+RCX*4]*/ - addbyte(0x8d); - addbyte(0x34); - addbyte(0x8e); - addbyte(0x89); /*MOV ebp_store, EBP*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ - addbyte(0x8b); - addbyte(0xac); - addbyte(0xcf); - addlong(offsetof(voodoo_state_t, tex)); - addbyte(0x88); /*MOV CL, DL*/ - addbyte(0xd1); - addbyte(0x89); /*MOV EDX, EBX*/ - addbyte(0xda); - if (!state->clamp_s) - { - addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ - addbyte(0x86); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - } - addbyte(0x83); /*ADD EDX, 1*/ - addbyte(0xc2); - addbyte(1); - if (state->clamp_t) - { - addbyte(0x0f); /*CMOVS EDX, zero*/ - addbyte(0x48); - addbyte(0x14); - addbyte(0x25); - addlong(&zero); - addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ - addbyte(0x47); - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x85); /*TEST EBX,EBX*/ - addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ - addbyte(0x48); - addbyte(0x1c); - addbyte(0x25); - addlong(&zero); - addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ - addbyte(0x47); - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - } - else - { - addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - } - /*EAX = S, EBX = T0, EDX = T1*/ - addbyte(0xd3); /*SHL EBX, CL*/ - addbyte(0xe3); - addbyte(0xd3); /*SHL EDX, CL*/ - addbyte(0xe2); - if (state->tformat & 8) - { - addbyte(0x48); /*LEA RBX,[RBP+RBX*2]*/ - addbyte(0x8d); - addbyte(0x5c); - addbyte(0x5d); - addbyte(0); - addbyte(0x48); /*LEA RDX,[RBP+RDX*2]*/ - addbyte(0x8d); - addbyte(0x54); - addbyte(0x55); - addbyte(0); - } - else - { - addbyte(0x48); /*ADD RBX, RBP*/ - addbyte(0x01); - addbyte(0xeb); - addbyte(0x48); /*ADD RDX, RBP*/ - addbyte(0x01); - addbyte(0xea); - } - if (state->clamp_s) - { - addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ - addbyte(0xae); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - addbyte(0x85); /*TEST EAX, EAX*/ - addbyte(0xc0); - addbyte(0x8b); /*MOV ebp_store2, RSI*/ - addbyte(0xb7); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x0f); /*CMOVS EAX, zero*/ - addbyte(0x48); - addbyte(0x04); - addbyte(0x25); - addlong(&zero); - addbyte(0x78); /*JS + - clamp on 0*/ - addbyte(2+3+2+ ((state->tformat & 8) ? (3+3+2) : (4+4+2))); - addbyte(0x3b); /*CMP EAX, EBP*/ - addbyte(0xc5); - addbyte(0x0f); /*CMOVAE EAX, EBP*/ - addbyte(0x43); - addbyte(0xc5); - addbyte(0x73); /*JAE + - clamp on +*/ - addbyte((state->tformat & 8) ? (3+3+2) : (4+4+2)); - } - else - { - addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ - addbyte(0x86); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - addbyte(0x8b); /*MOV ebp_store2, ESI*/ - addbyte(0xb7); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x74); /*JE +*/ - addbyte((state->tformat & 8) ? (3+3+2) : (4+4+2)); - } - - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV EDX,[RDX+RAX*2]*/ - addbyte(0x14); - addbyte(0x42); - addbyte(0x8b); /*MOV EAX,[RBX+RAX*2]*/ - addbyte(0x04); - addbyte(0x43); - } - else - { - addbyte(0x0f); /*MOVZX EDX,W[RDX+RAX]*/ - addbyte(0xb7); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,W[RBX+RAX]*/ - addbyte(0xb7); - addbyte(0x04); - addbyte(0x03); - } - - if (state->clamp_s) - { - addbyte(0xeb); /*JMP +*/ - addbyte((state->tformat & 8) ? (3+4+3+3+4+3) : (4+4+2+2)); - - /*S clamped - the two S coordinates are the same*/ - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV ECX, [RDX+RAX*2]*/ - addbyte(0x0c); - addbyte(0x42); - addbyte(0x8b); /*MOV EDX, [RDX+RAX*2-2]*/ - addbyte(0x54); - addbyte(0x42); - addbyte(-2); - addbyte(0x66); /*MOV DX, CX*/ - addbyte(0x89); - addbyte(0xca); - addbyte(0x8b); /*MOV ECX, [RBX+RAX*2]*/ - addbyte(0x0c); - addbyte(0x43); - addbyte(0x8b); /*MOV EAX, [RBX+RAX*2-2]*/ - addbyte(0x44); - addbyte(0x43); - addbyte(-2); - addbyte(0x66); /*MOV AX, CX*/ - addbyte(0x89); - addbyte(0xc8); - } - else - { - addbyte(0x0f); /*MOVZX EDX,W[RDX+RAX]*/ - addbyte(0xb7); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,W[RBX+RAX]*/ - addbyte(0xb7); - addbyte(0x04); - addbyte(0x03); - addbyte(0x88); /*MOV DH, DL*/ - addbyte(0xd6); - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - } - } - else - { - addbyte(0xeb); /*JMP +*/ - addbyte((state->tformat & 8) ? (3+3+3+3+3+3) : (2+2+4+4+2+2)); - - /*S wrapped - the two S coordinates are not contiguous*/ - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV ECX, [RDX+RAX*2]*/ - addbyte(0x0c); - addbyte(0x42); - addbyte(0x8b); /*MOV EDX, [RDX-2]*/ - addbyte(0x52); - addbyte(-2); - addbyte(0x66); /*MOV DX, CX*/ - addbyte(0x89); - addbyte(0xca); - addbyte(0x8b); /*MOV ECX, [RBX+RAX*2]*/ - addbyte(0x0c); - addbyte(0x43); - addbyte(0x8b); /*MOV EAX, [RBX-2]*/ - addbyte(0x43); - addbyte(-2); - addbyte(0x66); /*MOV AX, CX*/ - addbyte(0x89); - addbyte(0xc8); - } - else - { - addbyte(0x8a); /*MOV CL, [RDX]*/ - addbyte(0x0a); - addbyte(0x8a); /*MOV CH, [RBX]*/ - addbyte(0x2b); - addbyte(0x0f); /*MOVZX EDX,B[RDX+RAX]*/ - addbyte(0xb6); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,B[RBX+RAX]*/ - addbyte(0xb6); - addbyte(0x04); - addbyte(0x03); - addbyte(0x88); /*MOV DH, CL*/ - addbyte(0xce); - addbyte(0x88); /*MOV AH, CH*/ - addbyte(0xec); - } - } - - addbyte(0x49); /*MOV R8, bilinear_lookup*/ - addbyte(0xb8); - addquad(bilinear_lookup); - addbyte(0x4c); /*ADD RSI, R8*/ - addbyte(0x01); - addbyte(0xc6); - - switch (state->tformat) - { - case TEX_RGB332: - addbyte(0x49); /*MOV R8, rgb332*/ - addbyte(0xb8); - addquad(rgb332); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+ECX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_Y4I2Q2: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8: - addbyte(0x66); /*MOVZX CX, AH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*MOVZX AX, AL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xc0); - addbyte(0x66); /*IMUL CX, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*IMUL AX, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x06); - addbyte(0x66); /*MOVZX BX, DH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xde); - addbyte(0x66); /*MOVZX DX, DL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xd2); - addbyte(0x66); /*ADD AX, CX*/ - addbyte(0x01); - addbyte(0xc8); - addbyte(0x66); /*IMUL BX, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x5e); - addbyte(0x30); - addbyte(0x66); /*IMUL DX, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x56); - addbyte(0x20); - addbyte(0x66); /*ADD AX, BX*/ - addbyte(0x01); - addbyte(0xd8); - addbyte(0x66); /*ADD AX, DX*/ - addbyte(0x01); - addbyte(0xd0); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x66); /*MOV BX, AX*/ - addbyte(0x89); - addbyte(0xc3); - addbyte(0x0f); /*BSWAP EAX*/ - addbyte(0xc8); - addbyte(0x66); /*MOV AX, BX*/ - addbyte(0x89); - addbyte(0xd8); - break; - - case TEX_I8: - addbyte(0x66); /*MOVZX CX, AH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*MOVZX AX, AL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xc0); - addbyte(0x66); /*IMUL CX, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*IMUL AX, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x06); - addbyte(0x66); /*MOVZX BX, DH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xde); - addbyte(0x66); /*MOVZX DX, DL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xd2); - addbyte(0x66); /*ADD AX, CX*/ - addbyte(0x01); - addbyte(0xc8); - addbyte(0x66); /*IMUL BX, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x5e); - addbyte(0x30); - addbyte(0x66); /*IMUL DX, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x56); - addbyte(0x20); - addbyte(0x66); /*ADD AX, BX*/ - addbyte(0x01); - addbyte(0xd8); - addbyte(0x66); /*ADD AX, DX*/ - addbyte(0x01); - addbyte(0xd0); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_AI8: - addbyte(0x49); /*MOV R8, ai44*/ - addbyte(0xb8); - addquad(ai44); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+ECX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_PAL8: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8Y4I2Q2: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*PINSRW XMM0, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc1); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x0f); /*MOVZX EAX, AH*/ - addbyte(0xb6); - addbyte(0xc4); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PINSRW XMM1, EAX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc8); - addbyte(3); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PINSRW XMM3, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xd9); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PINSR1 XMM1, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc9); - addbyte(3); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_R5G6B5: - addbyte(0x49); /*MOV R8, rgb565*/ - addbyte(0xb8); - addquad(rgb565); - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x66); /*MOVD XMM1, [R8+RAX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x80); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+EDX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x90); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_ARGB1555: - addbyte(0x49); /*MOV R8, argb1555*/ - addbyte(0xb8); - addquad(argb1555); - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x66); /*MOVD XMM1, [R8+RAX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x80); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+EDX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x90); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_ARGB4444: - addbyte(0x49); /*MOV R8, argb4444*/ - addbyte(0xb8); - addquad(argb4444); - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x66); /*MOVD XMM1, [R8+RAX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x80); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+EDX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x90); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_A8I8: - addbyte(0x49); /*MOV R8, ai88*/ - addbyte(0xb8); - addquad(ai88); - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x88); - addbyte(0x66); /*MOVD XMM1, [R8+RAX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x80); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [R8+RCX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [R8+EDX*4]*/ - addbyte(0x41); - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x90); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_APAL88: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*PINSRW XMM0, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc1); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x0f); /*MOVZX EAX, AH*/ - addbyte(0xb6); - addbyte(0xc4); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PINSRW XMM1, EAX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc8); - addbyte(3); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PINSRW XMM3, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xd9); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PINSR1 XMM1, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - - addbyte(0xc9); - addbyte(3); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } - - addbyte(0x4c); /*MOV RSI, R15*/ - addbyte(0x89); - addbyte(0xfe); - } - else - { - addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0x8a); /*MOV DL, params->tex_shift[RSI+RCX*4]*/ - addbyte(0x94); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_shift)); - addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ - addbyte(0x8b); - addbyte(0xac); - addbyte(0xcf); - addlong(offsetof(voodoo_state_t, tex)); - addbyte(0x80); /*ADD CL, 4*/ - addbyte(0xc1); - addbyte(4); - addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0xd3); /*SHR EAX, CL*/ - addbyte(0xe8); - addbyte(0xd3); /*SHR EBX, CL*/ - addbyte(0xeb); - if (state->clamp_s) - { - addbyte(0x85); /*TEST EAX, EAX*/ - addbyte(0xc0); - addbyte(0x0f); /*CMOVS EAX, zero*/ - addbyte(0x48); - addbyte(0x04); - addbyte(0x25); - addlong(&zero); - addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ - addbyte(0x43); - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - - } - else - { - addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - } - if (state->clamp_t) - { - addbyte(0x85); /*TEST EBX, EBX*/ - addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ - addbyte(0x48); - addbyte(0x1c); - addbyte(0x25); - addlong(&zero); - addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ - addbyte(0x43); - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - } - else - { - addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - } - addbyte(0x88); /*MOV CL, DL*/ - addbyte(0xd1); - addbyte(0xd3); /*SHL EBX, CL*/ - addbyte(0xe3); - addbyte(0x01); /*ADD EBX, EAX*/ - addbyte(0xc3); - - if (state->tformat & 8) - { - addbyte(0x0f); /*MOVZX EAX,W[EBP+EBX*2]*/ - addbyte(0xb7); - addbyte(0x44); - addbyte(0x5d); - addbyte(0); - } - else - { - addbyte(0x0f); /*MOVZX EAX,B[EBP+EBX]*/ - addbyte(0xb6); - addbyte(0x44); - addbyte(0x1d); - addbyte(0); - } - - switch (state->tformat) - { - case TEX_RGB332: - addbyte(0x49); /*MOV R8, rgb332*/ - addbyte(0xb8); - addquad(rgb332); - addbyte(0x41); /*MOV EAX, [R8+EAX*4]*/ - addbyte(0x8b); - addbyte(0x04); - addbyte(0x80); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_Y4I2Q2: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8: - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0x66); /*MOV BX, AX*/ - addbyte(0x89); - addbyte(0xc3); - addbyte(0x0f); /*BSWAP EAX*/ - addbyte(0xc8); - addbyte(0x66); /*MOV AX, BX*/ - addbyte(0x89); - addbyte(0xd8); - break; - - case TEX_I8: - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_AI8: - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x83); /*AND EAX, 0x0f*/ - addbyte(0xe0); - addbyte(0x0f); - addbyte(0x81); /*AND EBX, 0xf0*/ - addbyte(0xe3); - addlong(0xf0); - addbyte(0x89); /*MOV ECX, EAX*/ - addbyte(0xc1); - addbyte(0x89); /*MOV EDX, EBX*/ - addbyte(0xda); - addbyte(0xc1); /*SHL ECX, 4*/ - addbyte(0xe1); - addbyte(4); - addbyte(0xc1); /*SHR EDX, 4*/ - addbyte(0xe2); - addbyte(4); - addbyte(0x09); /*OR EAX, ECX*/ - addbyte(0xc8); - addbyte(0x09); /*OR EBX, EDX*/ - addbyte(0xd3); - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0xc1); /*SHL EBX, 24*/ - addbyte(0xe3); - addbyte(24); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - break; - - case TEX_PAL8: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8Y4I2Q2: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x25); /*AND EAX, 0x000000ff*/ - addlong(0x000000ff); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0xc1); /*SHL EBX, 16*/ - addbyte(0xe3); - addbyte(16); - addbyte(0x81); /*AND EBX, 0xff000000*/ - addbyte(0xe3); - addlong(0xff000000); - addbyte(0x25); /*AND EAX, 0x00ffffff*/ - addlong(0x00ffffff); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - break; - - case TEX_R5G6B5: - addbyte(0x49); /*MOV R8, rgb565*/ - addbyte(0xb8); - addquad(rgb565); - addbyte(0x41); /*MOV EAX, [R8+EAX*4]*/ - addbyte(0x8b); - addbyte(0x04); - addbyte(0x80); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_ARGB1555: - addbyte(0x49); /*MOV R8, argb1555*/ - addbyte(0xb8); - addquad(argb1555); - addbyte(0x41); /*MOV EAX, [R8+EAX*4]*/ - addbyte(0x8b); - addbyte(0x04); - addbyte(0x80); - break; - - case TEX_ARGB4444: - addbyte(0x49); /*MOV R8, argb4444*/ - addbyte(0xb8); - addquad(argb4444); - addbyte(0x41); /*MOV EAX, [R8+EAX*4]*/ - addbyte(0x8b); - addbyte(0x04); - addbyte(0x80); - break; - - case TEX_A8I8: - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0xc1); /*SHL EAX, 16*/ - addbyte(0xe0); - addbyte(16); - addbyte(0x88); /*MOV AL, BL*/ - addbyte(0xd8); - addbyte(0x88); /*MOV AH, BL*/ - addbyte(0xdc); - break; - - case TEX_APAL88: - addbyte(0x48); /*MOV RBP, state->palette[EDI]*/ - addbyte(0x8b); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x25); /*AND EAX, 0x000000ff*/ - addlong(0x000000ff); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0xc1); /*SHL EBX, 16*/ - addbyte(0xe3); - addbyte(16); - addbyte(0x81); /*AND EBX, 0xff000000*/ - addbyte(0xe3); - addlong(0xff000000); - addbyte(0x25); /*AND EAX, 0x00ffffff*/ - addlong(0x00ffffff); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } - } - if ((params->fbzMode & FBZ_CHROMAKEY)) - { - addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, chromaKey)); - addbyte(0x31); /*XOR EBX, EAX*/ - addbyte(0xc3); - addbyte(0x81); /*AND EBX, 0xffffff*/ - addbyte(0xe3); - addlong(0xffffff); - addbyte(0x0f); /*JE skip*/ - addbyte(0x84); - chroma_skip_pos = block_pos; - addlong(0); - } - addbyte(0x66); /*MOVD XMM0, EAX*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0xc0); - addbyte(0xc1); /*SHR EAX, 24*/ - addbyte(0xe8); addbyte(24); - addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, tex_a)); } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else + { + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + addbyte(0x66); /*MOVD XMM3, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd8); + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend_1) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend_1) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOVQ XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + break; + case TC_MSELECT_AOTHER: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM0, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x83); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x66); /*PXOR XMM0, xmm_ff_w*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x04); + addbyte(0x25); + addlong((uint32_t)&xmm_ff_w); + } + addbyte(0x66); /*PADDW XMM0, xmm_01_w*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x04); + addbyte(0x25); + addlong((uint32_t)&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xca); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM5, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xeb); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc5); + addbyte(0x66); /*PSRAD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + if (tc_add_clocal_1) + { + addbyte(0x66); /*PADDW XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xcb); + } + else if (tc_add_alocal_1) + { + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + addbyte(0x66); /*PACKUSWB XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xd9); + if (tca_sub_clocal_1) + { + addbyte(0x66); /*MOVD EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + + if (tca_sub_clocal_1) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/ + addbyte(0x04); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x8e); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + addbyte(0xf7); /*NEG EAX*/ + addbyte(0xd8); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal_1 || tca_add_alocal_1) + { + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x39); /*CMP ECX, EAX*/ + addbyte(0xc1); + addbyte(0x0f); /*CMOVA ECX, EAX*/ + addbyte(0x47); + addbyte(0xc8); + addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0xd8); + addbyte(3); + } + + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + + /*XMM0 = TMU0 output, XMM3 = TMU1 output*/ + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (tc_zero_other) + { + addbyte(0x66); /*PXOR XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc9); + } + else + { + addbyte(0xf3); /*MOV XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xcb); + } + if (tc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + } + + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + break; + case TC_MSELECT_AOTHER: + addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe3); + addbyte(0xff); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/ + addlong(params->detail_bias[0]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[0]*/ + addlong(params->detail_max[0]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/ + addbyte(0xe0); + addbyte(params->detail_scale[0]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xe0); + addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM4, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xa3); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM4, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x24); + addbyte(0x25); + addlong(&xmm_ff_w); + } + addbyte(0x66); /*PADDW XMM4, 1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x24); + addbyte(0x25); + addlong(&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM5, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PMULLW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xcc); + + + addbyte(0x66); /*PMULHW XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xec); + addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xcd); + addbyte(0x66); /*PSRAD XMM1, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + + if (tc_add_clocal) + { + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + else if (tc_add_alocal) + { + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xfc); + addbyte(0xcc); + } + if (tc_invert_output) + { + addbyte(0x66); /*PXOR XMM1, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x0d); + addlong(&xmm_ff_w); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xdb); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + + if (tca_zero_other) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + else + { + addbyte(0x66); /*MOV EAX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + } + if (tca_sub_clocal) + { + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x29); /*SUB EAX, EBX*/ + addbyte(0xd8); + } + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x66); /*MOV EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EBX, state->lod*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/ + addbyte(0xe3); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVA EBX, EDX*/ + addbyte(0x47); + addbyte(0xda); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/ + addbyte(0x1c); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tca_reverse_blend) + { + addbyte(0x81); /*XOR EBX, 0xFF*/ + addbyte(0xf3); + addlong(0xff); + } + + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + if (tca_add_clocal || tca_add_alocal) + { + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x0f); /*CMOVS EAX, EDX*/ + addbyte(0x48); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + if (tca_invert_output) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + + addbyte(0xf3); /*MOVQ XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc1); + } + + if ((params->fbzMode & FBZ_CHROMAKEY)) + { + addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, chromaKey)); + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x81); /*AND EBX, 0xffffff*/ + addbyte(0xe3); + addlong(0xffffff); + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + chroma_skip_pos = block_pos; + addlong(0); + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + addbyte(0xb8); /*MOV EAX, 0x000001*/ + if (voodoo->dual_tmus) + { + addlong(0x0000c1); + } + else + { + addlong(0x000001); + } + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + } if (params->alphaMode & ((1 << 0) | (1 << 4))) { @@ -3731,6 +3012,20 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addlong(offsetof(voodoo_state_t, z)); } + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[1].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[1].dWdX)); + } + addbyte(0xf3); /*MOVDQU state->tmu0_s, XMM3*/ addbyte(0x0f); addbyte(0x7f); @@ -3746,12 +3041,79 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0xd6); addbyte(0x87); addlong(offsetof(voodoo_state_t, w)); + + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + } + + addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + } addbyte(0x83); /*ADD state->pixel_count[EDI], 1*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, pixel_count)); addbyte(1); + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + { + addbyte(0x83); /*ADD state->texel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(1); + } + else + { + addbyte(0x83); /*ADD state->texel_count[EDI], 2*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(2); + } + } + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, x)); @@ -3806,7 +3168,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, params->fbzMode == data->fbzMode && params->fogMode == data->fogMode && params->fbzColorPath == data->fbzColorPath && - (voodoo->trexInit1 & (1 << 18)) == data->trexInit1 && + (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 && params->textureMode == data->textureMode) { last_block[odd_even] = b; @@ -3826,7 +3188,7 @@ voodoo_recomp++; data->fbzMode = params->fbzMode; data->fogMode = params->fogMode; data->fbzColorPath = params->fbzColorPath; - data->trexInit1 = voodoo->trexInit1 & (1 << 18); + data->trexInit1 = voodoo->trexInit1[0] & (1 << 18); data->textureMode = params->textureMode; next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; @@ -3874,12 +3236,12 @@ static void voodoo_codegen_init(voodoo_t *voodoo) d[2] = (16 - _ds) * dt; d[3] = _ds * dt; - bilinear_lookup[c*4] = _mm_set_epi32(0, 0, d[0] | (d[0] << 16), d[0] | (d[0] << 16)); - bilinear_lookup[c*4 + 1] = _mm_set_epi32(0, 0, d[1] | (d[1] << 16), d[1] | (d[1] << 16)); - bilinear_lookup[c*4 + 2] = _mm_set_epi32(0, 0, d[2] | (d[2] << 16), d[2] | (d[2] << 16)); - bilinear_lookup[c*4 + 3] = _mm_set_epi32(0, 0, d[3] | (d[3] << 16), d[3] | (d[3] << 16)); + bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16)); + bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16)); } alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16)); + xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0); + xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); } static void voodoo_codegen_close(voodoo_t *voodoo) diff --git a/src/vid_voodoo_codegen_x86.h b/src/vid_voodoo_codegen_x86.h index 5d47445b0..5e0e10032 100644 --- a/src/vid_voodoo_codegen_x86.h +++ b/src/vid_voodoo_codegen_x86.h @@ -69,7 +69,556 @@ static double const_1_48 = (double)(1ull << 4); static __m128i alookup[257], aminuslookup[256]; static __m128i minus_254;// = 0xff02ff02ff02ff02ull; -static __m128i bilinear_lookup[256*4]; +static __m128i bilinear_lookup[256*2]; +static __m128i xmm_00_ff_w[2]; +static uint32_t i_00_ff_w[2] = {0, 0xff}; + +static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) +{ + if (params->textureMode[tmu] & 1) + { + addbyte(0xdf); /*FILDq state->tmu0_w*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0xdd); /*FLDq const_1_48*/ + addbyte(0x05); + addlong(&const_1_48); + addbyte(0xde); /*FDIV ST(1)*/ + addbyte(0xf1); + addbyte(0xdf); /*FILDq state->tmu0_s*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xdf); /*FILDq state->tmu0_t*/ /*ST(0)=t, ST(1)=s, ST(2)=1/w*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=s, ST(1)=t, ST(2)=1/w*/ + addbyte(0xc9); + addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=s/w, ST(1)=t, ST(2)=1/w*/ + addbyte(0xca); + addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=t, ST(1)=s/w, ST(2)=1/w*/ + addbyte(0xc9); + addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=t/w, ST(1)=s/w, ST(2)=1/w*/ + addbyte(0xca); + addbyte(0xd9); /*FXCH ST(2)*/ /*ST(0)=1/w, ST(1)=s/w, ST(2)=t/w*/ + addbyte(0xca); + addbyte(0xd9); /*FSTPs log_temp*/ /*ST(0)=s/w, ST(1)=t/w*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, log_temp)); + addbyte(0xdf); /*FSITPq state->tex_s*/ + addbyte(0xbf); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EAX, log_temp*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, log_temp)); + addbyte(0xdf); /*FSITPq state->tex_t*/ + addbyte(0xbf); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0xc1); /*SHR EAX, 23-8*/ + addbyte(0xe8); + addbyte(15); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0x25); /*AND EAX, 0xff00*/ + addlong(0xff00); + addbyte(0x2d); /*SUB EAX, (127-44)<<8*/ + addlong((127-44+19) << 8); + addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/ + addbyte(0xb6); + addbyte(0x9b); + addlong(logtable); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x03); /*ADD EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tmu[tmu].lod)); + addbyte(0x3b); /*CMP EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ + addbyte(0x4c); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x3b); /*CMP EAX, state->lod_max*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ + addbyte(0x4d); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + else + { + addbyte(0xf3); /*MOVQ XMM4, state->tmu0_s*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xf3); /*MOVQ XMM5, state->tmu0_t*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0xc7); /*MOV state->lod[tmu], 0*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addlong(0); + addbyte(0x8b); /*MOV EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x66); /*SHRQ XMM4, 28*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd4); + addbyte(28); + addbyte(0x66); /*SHRQ XMM5, 28*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd5); + addbyte(28); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x66); /*MOVQ state->tex_s, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x66); /*MOVQ state->tex_t, XMM5*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + /*EAX = state->lod*/ + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6)) + { + addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_lod[tmu])); + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/ + addbyte(0x0c); + addbyte(0x81); + addbyte(0xbd); /*MOV EBP, 1*/ + addlong(1); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0xd3); /*SHL EBP, CL*/ + addbyte(0xe5); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0xc1); /*SHL EBP, 3*/ + addbyte(0xe5); + addbyte(3); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x29); /*SUB EAX, EBP*/ + addbyte(0xe8); + addbyte(0x29); /*SUB EBX, EBP*/ + addbyte(0xeb); + addbyte(0xd3); /*SAR EAX, CL*/ + addbyte(0xf8); + addbyte(0xd3); /*SAR EBX, CL*/ + addbyte(0xfb); + addbyte(0x89); /*MOV EBP, EAX*/ + addbyte(0xc5); + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + addbyte(0x83); /*AND EBP, 0xf*/ + addbyte(0xe5); + addbyte(0xf); + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0xc1); /*SAR EAX, 4*/ + addbyte(0xf8); + addbyte(4); + addbyte(0x81); /*AND ECX, 0xf0*/ + addbyte(0xe1); + addlong(0xf0); + addbyte(0xc1); /*SAR EBX, 4*/ + addbyte(0xfb); + addbyte(4); + addbyte(0x09); /*OR EBP, ECX*/ + addbyte(0xcd); + addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xc1); /*SHL EBP, 5*/ + addbyte(0xe5); + addbyte(5); + /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ + addbyte(0x8d); /*LEA ESI, [ESI+ECX*4]*/ + addbyte(0x34); + addbyte(0x8e); + addbyte(0x89); /*MOV ebp_store, EBP*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ + addbyte(0xac); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + if (!state->clamp_s[tmu]) + { + addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + } + addbyte(0x83); /*ADD EDX, 1*/ + addbyte(0xc2); + addbyte(1); + if (state->clamp_t[tmu]) + { + addbyte(0x0f); /*CMOVS EDX, zero*/ + addbyte(0x48); + addbyte(0x15); + addlong(&zero); + addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x85); /*TEST EBX,EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1d); + addlong(&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + else + { + addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + /*EAX = S, EBX = T0, EDX = T1*/ + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0xd3); /*SHL EDX, CL*/ + addbyte(0xe2); + addbyte(0x8d); /*LEA EBX,[EBP+EBX*2]*/ + addbyte(0x5c); + addbyte(0x9d); + addbyte(0); + addbyte(0x8d); /*LEA EDX,[EBP+EDX*2]*/ + addbyte(0x54); + addbyte(0x95); + addbyte(0); + if (state->clamp_s[tmu]) + { + addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x8b); /*MOV ESI, ebp_store*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x05); + addlong(&zero); + addbyte(0x78); /*JS + - clamp on 0*/ + addbyte(2+3+2+ 5+5+2); + addbyte(0x3b); /*CMP EAX, EBP*/ + addbyte(0xc5); + addbyte(0x0f); /*CMOVAE EAX, EBP*/ + addbyte(0x43); + addbyte(0xc5); + addbyte(0x73); /*JAE + - clamp on +*/ + addbyte(5+5+2); + } + else + { + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x8b); /*MOV ESI, ebp_store*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x74); /*JE +*/ + addbyte(5+5+2); + } + + addbyte(0xf3); /*MOVQ XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x04); + addbyte(0x83); + addbyte(0xf3); /*MOVQ XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x0c); + addbyte(0x82); + + if (state->clamp_s) + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+4+4); + + /*S clamped - the two S coordinates are the same*/ + addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0); + addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc9); + } + else + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+5+5+6+6); + + /*S wrapped - the two S coordinates are not contiguous*/ + addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PINSRW XMM0, [EBX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x03); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM1, [EDX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x0a); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM0, 2[EBX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x43); + addbyte(0x02); + addbyte(0x03); + addbyte(0x66); /*PINSRW XMM1, 2[EDX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x4a); + addbyte(0x02); + addbyte(0x03); + } + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + + addbyte(0x81); /*ADD ESI, bilinear_lookup*/ + addbyte(0xc6); + addlong(bilinear_lookup); + + addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x06); + addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x4e); + addbyte(0x10); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 0); + addbyte(8); + addbyte(0x66); /*PSRLW XMM1, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 1); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*MOV XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xc0 | 0 | (1 << 3)); + addbyte(0x66); /*PSRLDQ XMM0, 64*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd8); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + addbyte(0x8b); /*MOV ESI, [ESP+8]*/ + addbyte(0x74); + addbyte(0x24); + addbyte(8+16); /*CHECK!*/ + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + } + else + { + addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_lod[tmu])); + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/ + addbyte(0x0c); + addbyte(0x81); + addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ + addbyte(0xac); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0x80); /*ADD CL, 4*/ + addbyte(0xc1); + addbyte(4); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0xd3); /*SHR EAX, CL*/ + addbyte(0xe8); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + if (state->clamp_s[tmu]) + { + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x05); + addlong(&zero); + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + + } + else + { + addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + } + if (state->clamp_t[tmu]) + { + addbyte(0x85); /*TEST EBX, EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1d); + addlong(&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + else + { + addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0x01); /*ADD EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX,[EBP+EBX*4]*/ + addbyte(0x44); + addbyte(0x9d); + addbyte(0); + } + } + + return block_pos; +} static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) { @@ -213,11 +762,27 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo if (params->fbzMode & FBZ_DEPTH_BIAS) { - addbyte(0x03); /*ADD EAX, params->zaColor[ESI]*/ - addbyte(0x86); + addbyte(0x0f); /*MOVSX EDX, params->zaColor[ESI]*/ + addbyte(0xbf); + addbyte(0x96); addlong(offsetof(voodoo_params_t, zaColor)); - addbyte(0x25); /*AND EAX, 0xffff*/ - addlong(0xffff); + if (params->fbzMode & FBZ_W_BUFFER) + { + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x01); /*ADD EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVS EAX, ECX*/ + addbyte(0x48); + addbyte(0xc1); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); } addbyte(0x89); /*MOV state->new_depth[EDI], EAX*/ @@ -309,2034 +874,745 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo /*EDI = state, ESI = params*/ - if (params->textureMode & 1) + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) { - /*CVTSI2SSq XMM6, state->w*/ - /*MOVSS XMM7, const_1_48*/ - /*DIVSS XMM7, XMM6*/ - /*CVTSI2SSq XMM4, state->tmu0_s*/ - /*MULSS XMM7, const_1_44*/ - /*CVTSI2SSq XMM5, state->tmu0_t*/ - /*MULSS XMM4, XMM7*/ - /*CVTSS2SIq state->tex_s, XMM4*/ - /*MULSS XMM5, XMM7*/ - /*CVTSS2SIq state->tex_t, XMM5*/ + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); - addbyte(0xdf); /*FILDq state->tmu0_w*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, tmu0_w)); - addbyte(0xdd); /*FLDq const_1_48*/ - addbyte(0x05); - addlong(&const_1_48); - addbyte(0xde); /*FDIV ST(1)*/ - addbyte(0xf1); - addbyte(0xdf); /*FILDq state->tmu0_s*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, tmu0_s)); - addbyte(0xdf); /*FILDq state->tmu0_t*/ /*ST(0)=t, ST(1)=s, ST(2)=1/w*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, tmu0_t)); - addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=s, ST(1)=t, ST(2)=1/w*/ - addbyte(0xc9); - addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=s/w, ST(1)=t, ST(2)=1/w*/ - addbyte(0xca); - addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=t, ST(1)=s/w, ST(2)=1/w*/ - addbyte(0xc9); - addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=t/w, ST(1)=s/w, ST(2)=1/w*/ - addbyte(0xca); - addbyte(0xd9); /*FXCH ST(2)*/ /*ST(0)=1/w, ST(1)=s/w, ST(2)=t/w*/ - addbyte(0xca); - addbyte(0xd9); /*FSTPs log_temp*/ /*ST(0)=s/w, ST(1)=t/w*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, log_temp)); - addbyte(0xdf); /*FSITPq state->tex_s*/ - addbyte(0xbf); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x8b); /*MOV EAX, log_temp*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, log_temp)); - addbyte(0xdf); /*FSITPq state->tex_t*/ - addbyte(0xbf); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0xc1); /*SHR EAX, 23-8*/ - addbyte(0xe8); - addbyte(15); - addbyte(0x0f); /*MOVZX EBX, AL*/ - addbyte(0xb6); - addbyte(0xd8); - addbyte(0x25); /*AND EAX, 0xff00*/ - addlong(0xff00); - addbyte(0x2d); /*SUB EAX, (127-44)<<8*/ - addlong((127-44+19) << 8); - addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/ - addbyte(0xb6); - addbyte(0x9b); - addlong(logtable); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); -// addbyte(0x89); /*MOV state->lod_raw, EAX*/ -// addbyte(0x87); -// addlong(offsetof(voodoo_state_t, lod_raw)); - addbyte(0x03); /*ADD EAX, state->lod*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tmu[0].lod)); -/*HACK*/ -#if 0 - addbyte(0x8b); /*MOV EAX, state->lod_min*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); -#endif - addbyte(0x3b); /*CMP EAX, state->lod_min*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ - addbyte(0x4c); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0x3b); /*CMP EAX, state->lod_max*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_max)); - addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ - addbyte(0x4d); - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_max)); - addbyte(0xc1); /*SHR EAX, 8*/ - addbyte(0xe8); - addbyte(8); - addbyte(0x89); /*MOV state->lod, EAX*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod)); - } - else - { - addbyte(0xf3); /*MOVQ XMM4, state->tmu0_s*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xa7); - addlong(offsetof(voodoo_state_t, tmu0_s)); - addbyte(0xf3); /*MOVQ XMM5, state->tmu0_t*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, tmu0_t)); - addbyte(0x66); /*SHRQ XMM4, 28*/ - addbyte(0x0f); - addbyte(0x73); - addbyte(0xd4); - addbyte(28); - addbyte(0x66); /*SHRQ XMM5, 28*/ - addbyte(0x0f); - addbyte(0x73); - addbyte(0xd5); - addbyte(28); - addbyte(0x66); /*MOVQ state->tex_s, XMM4*/ - addbyte(0x0f); - addbyte(0xd6); - addbyte(0xa7); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x66); /*MOVQ state->tex_t, XMM5*/ - addbyte(0x0f); - addbyte(0xd6); - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0x8b); /*MOV EAX, state->lod_min*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod_min)); - addbyte(0xc1); /*SHR EAX, 8*/ - addbyte(0xe8); - addbyte(8); - addbyte(0x89); /*MOV state->lod, EAX*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, lod)); - } - - - if (voodoo->trexInit1 & (1 << 18)) - { -#if 0 - addbyte(0xc7); /*MOV state->tex_r, 0*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_r)); - addlong(0); - addbyte(0xc7); /*MOV state->tex_g, 0*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_g)); - addlong(0); - addbyte(0xc7); /*MOV state->tex_b, 1*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_b)); - addlong(1); -#endif - addbyte(0xb8); /*MOV EAX, 0x000001*/ - addlong(0x000001); addbyte(0x66); /*MOVD XMM0, EAX*/ addbyte(0x0f); addbyte(0x6e); addbyte(0xc0); - } - else if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) - { - if (voodoo->bilinear_enabled && (params->textureMode & 6)) - { - addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0xbd); /*MOV EBP, 1*/ - addlong(1); - addbyte(0x8a); /*MOV DL, params->tex_shift[ESI+ECX*4]*/ - addbyte(0x94); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_shift)); - addbyte(0xd3); /*SHL EBP, CL*/ - addbyte(0xe5); - addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0xc1); /*SHL EBP, 3*/ - addbyte(0xe5); - addbyte(3); - addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0x29); /*SUB EAX, EBP*/ - addbyte(0xe8); - addbyte(0x29); /*SUB EBX, EBP*/ - addbyte(0xeb); - addbyte(0xd3); /*SAR EAX, CL*/ - addbyte(0xf8); - addbyte(0xd3); /*SAR EBX, CL*/ - addbyte(0xfb); - addbyte(0x89); /*MOV EBP, EAX*/ - addbyte(0xc5); - addbyte(0x89); /*MOV ECX, EBX*/ - addbyte(0xd9); - addbyte(0x83); /*AND EBP, 0xf*/ - addbyte(0xe5); - addbyte(0xf); - addbyte(0xc1); /*SHL ECX, 4*/ - addbyte(0xe1); - addbyte(4); - addbyte(0xc1); /*SAR EAX, 4*/ - addbyte(0xf8); - addbyte(4); - addbyte(0x81); /*AND ECX, 0xf0*/ - addbyte(0xe1); - addlong(0xf0); - addbyte(0xc1); /*SAR EBX, 4*/ - addbyte(0xfb); - addbyte(4); - addbyte(0x09); /*OR EBP, ECX*/ - addbyte(0xcd); - addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0xc1); /*SHL EBP, 6*/ - addbyte(0xe5); - addbyte(6); - /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ - addbyte(0x8d); /*LEA ESI, [ESI+ECX*4]*/ - addbyte(0x34); - addbyte(0x8e); - addbyte(0x89); /*MOV ebp_store, EBP*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ - addbyte(0xac); - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tex)); - addbyte(0x88); /*MOV CL, DL*/ - addbyte(0xd1); - addbyte(0x89); /*MOV EDX, EBX*/ - addbyte(0xda); - if (!state->clamp_s) - { - addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ - addbyte(0x86); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - } - addbyte(0x83); /*ADD EDX, 1*/ - addbyte(0xc2); - addbyte(1); - if (state->clamp_t) - { - addbyte(0x0f); /*CMOVS EDX, zero*/ - addbyte(0x48); - addbyte(0x15); - addlong(&zero); - addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ - addbyte(0x47); - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x85); /*TEST EBX,EBX*/ - addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ - addbyte(0x48); - addbyte(0x1d); - addlong(&zero); - addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ - addbyte(0x47); - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - } - else - { - addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ - addbyte(0x96); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, tex_h_mask)); - } - /*EAX = S, EBX = T0, EDX = T1*/ - addbyte(0xd3); /*SHL EBX, CL*/ - addbyte(0xe3); - addbyte(0xd3); /*SHL EDX, CL*/ - addbyte(0xe2); - if (state->tformat & 8) - { - addbyte(0x8d); /*LEA EBX,[EBP+EBX*2]*/ - addbyte(0x5c); - addbyte(0x5d); - addbyte(0); - addbyte(0x8d); /*LEA EDX,[EBP+EDX*2]*/ - addbyte(0x54); - addbyte(0x55); - addbyte(0); - } - else - { - addbyte(0x01); /*ADD EBX, EBP*/ - addbyte(0xeb); - addbyte(0x01); /*ADD EDX, EBP*/ - addbyte(0xea); - } - if (state->clamp_s) - { - addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ - addbyte(0xae); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - addbyte(0x85); /*TEST EAX, EAX*/ - addbyte(0xc0); - addbyte(0x8b); /*MOV ebp_store2, ESI*/ - addbyte(0xb7); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x0f); /*CMOVS EAX, zero*/ - addbyte(0x48); - addbyte(0x05); - addlong(&zero); - addbyte(0x78); /*JS + - clamp on 0*/ - addbyte(2+3+2+ ((state->tformat & 8) ? (3+3+2) : (4+4+2))); - addbyte(0x3b); /*CMP EAX, EBP*/ - addbyte(0xc5); - addbyte(0x0f); /*CMOVAE EAX, EBP*/ - addbyte(0x43); - addbyte(0xc5); - addbyte(0x73); /*JAE + - clamp on +*/ - addbyte((state->tformat & 8) ? (3+3+2) : (4+4+2)); - } - else - { - addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ - addbyte(0x86); - addlong(offsetof(voodoo_params_t, tex_w_mask)); - addbyte(0x8b); /*MOV ebp_store2, ESI*/ - addbyte(0xb7); - addlong(offsetof(voodoo_state_t, ebp_store)); - addbyte(0x74); /*JE +*/ - addbyte((state->tformat & 8) ? (3+3+2) : (4+4+2)); - } - - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV EDX,[EDX+EAX*2]*/ - addbyte(0x14); - addbyte(0x42); - addbyte(0x8b); /*MOV EAX,[EBX+EAX*2]*/ - addbyte(0x04); - addbyte(0x43); - } - else - { - addbyte(0x0f); /*MOVZX EDX,W[EDX+EAX]*/ - addbyte(0xb7); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,W[EBX+EAX]*/ - addbyte(0xb7); - addbyte(0x04); - addbyte(0x03); - } - - if (state->clamp_s) - { - addbyte(0xeb); /*JMP +*/ - addbyte((state->tformat & 8) ? (3+4+3+3+4+3) : (4+4+2+2)); - - /*S clamped - the two S coordinates are the same*/ - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV ECX, [EDX+EAX*2]*/ - addbyte(0x0c); - addbyte(0x42); - addbyte(0x8b); /*MOV EDX, [EDX+EAX*2-2]*/ - addbyte(0x54); - addbyte(0x42); - addbyte(-2); - addbyte(0x66); /*MOV DX, CX*/ - addbyte(0x89); - addbyte(0xca); - addbyte(0x8b); /*MOV ECX, [EBX+EAX*2]*/ - addbyte(0x0c); - addbyte(0x43); - addbyte(0x8b); /*MOV EAX, [EBX+EAX*2-2]*/ - addbyte(0x44); - addbyte(0x43); - addbyte(-2); - addbyte(0x66); /*MOV AX, CX*/ - addbyte(0x89); - addbyte(0xc8); - } - else - { - addbyte(0x0f); /*MOVZX EDX,W[EDX+EAX]*/ - addbyte(0xb7); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,W[EBX+EAX]*/ - addbyte(0xb7); - addbyte(0x04); - addbyte(0x03); - addbyte(0x88); /*MOV DH, DL*/ - addbyte(0xd6); - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - } - } - else - { - addbyte(0xeb); /*JMP +*/ - addbyte((state->tformat & 8) ? (3+3+3+3+3+3) : (2+2+4+4+2+2)); - - /*S wrapped - the two S coordinates are not contiguous*/ - if (state->tformat & 8) - { - addbyte(0x8b); /*MOV ECX, [EDX+EAX*2]*/ - addbyte(0x0c); - addbyte(0x42); - addbyte(0x8b); /*MOV EDX, [EDX-2]*/ - addbyte(0x52); - addbyte(-2); - addbyte(0x66); /*MOV DX, CX*/ - addbyte(0x89); - addbyte(0xca); - addbyte(0x8b); /*MOV ECX, [EBX+EAX*2]*/ - addbyte(0x0c); - addbyte(0x43); - addbyte(0x8b); /*MOV EAX, [EBX-2]*/ - addbyte(0x43); - addbyte(-2); - addbyte(0x66); /*MOV AX, CX*/ - addbyte(0x89); - addbyte(0xc8); - } - else - { - addbyte(0x8a); /*MOV CL, [EDX]*/ - addbyte(0x0a); - addbyte(0x8a); /*MOV CH, [EBX]*/ - addbyte(0x2b); - addbyte(0x0f); /*MOVZX EDX,B[EDX+EAX]*/ - addbyte(0xb6); - addbyte(0x14); - addbyte(0x02); - addbyte(0x0f); /*MOVZX EAX,B[EBX+EAX]*/ - addbyte(0xb6); - addbyte(0x04); - addbyte(0x03); - addbyte(0x88); /*MOV DH, CL*/ - addbyte(0xce); - addbyte(0x88); /*MOV AH, CH*/ - addbyte(0xec); - } - } - - addbyte(0x81); /*ADD ESI, bilinear_lookup*/ - addbyte(0xc6); - addlong(bilinear_lookup); - - switch (state->tformat) - { - case TEX_RGB332: - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, rgb332[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(rgb332); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, rgb332[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x8d); - addlong(rgb332); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, rgb332[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(rgb332); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, rgb332[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x8d); - addlong(rgb332); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_Y4I2Q2: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8: - addbyte(0x66); /*MOVZX CX, AH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*MOVZX AX, AL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xc0); - addbyte(0x66); /*IMUL CX, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*IMUL AX, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x06); - addbyte(0x66); /*MOVZX BX, DH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xde); - addbyte(0x66); /*MOVZX DX, DL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xd2); - addbyte(0x66); /*ADD AX, CX*/ - addbyte(0x01); - addbyte(0xc8); - addbyte(0x66); /*IMUL BX, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x5e); - addbyte(0x30); - addbyte(0x66); /*IMUL DX, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x56); - addbyte(0x20); - addbyte(0x66); /*ADD AX, BX*/ - addbyte(0x01); - addbyte(0xd8); - addbyte(0x66); /*ADD AX, DX*/ - addbyte(0x01); - addbyte(0xd0); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x66); /*MOV BX, AX*/ - addbyte(0x89); - addbyte(0xc3); - addbyte(0x0f); /*BSWAP EAX*/ - addbyte(0xc8); - addbyte(0x66); /*MOV AX, BX*/ - addbyte(0x89); - addbyte(0xd8); - break; - - case TEX_I8: - addbyte(0x66); /*MOVZX CX, AH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*MOVZX AX, AL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xc0); - addbyte(0x66); /*IMUL CX, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*IMUL AX, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x06); - addbyte(0x66); /*MOVZX BX, DH*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xde); - addbyte(0x66); /*MOVZX DX, DL*/ - addbyte(0x0f); - addbyte(0xb6); - addbyte(0xd2); - addbyte(0x66); /*ADD AX, CX*/ - addbyte(0x01); - addbyte(0xc8); - addbyte(0x66); /*IMUL BX, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x5e); - addbyte(0x30); - addbyte(0x66); /*IMUL DX, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xaf); - addbyte(0x56); - addbyte(0x20); - addbyte(0x66); /*ADD AX, BX*/ - addbyte(0x01); - addbyte(0xd8); - addbyte(0x66); /*ADD AX, DX*/ - addbyte(0x01); - addbyte(0xd0); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_AI8: - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, ai44[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(ai44); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, ai44[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x8d); - addlong(ai44); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, ai44[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(ai44); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, ai44[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x8d); - addlong(ai44); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_PAL8: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8Y4I2Q2: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*PINSRW XMM0, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc1); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x0f); /*MOVZX EAX, AH*/ - addbyte(0xb6); - addbyte(0xc4); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PINSRW XMM1, EAX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc8); - addbyte(3); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PINSRW XMM3, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xd9); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PINSR1 XMM1, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc9); - addbyte(3); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_R5G6B5: - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, rgb565[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(rgb565); - addbyte(0x66); /*MOVD XMM1, rgb565[EAX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x85); - addlong(rgb565); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, rgb565[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(rgb565); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, rgb565[EDX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x95); - addlong(rgb565); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_ARGB1555: - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, argb1555[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(argb1555); - addbyte(0x66); /*MOVD XMM1, argb1555[EAX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x85); - addlong(argb1555); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, argb1555[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(argb1555); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, argb1555[EDX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x95); - addlong(argb1555); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_ARGB4444: - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, argb4444[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(argb4444); - addbyte(0x66); /*MOVD XMM1, argb4444[EAX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x85); - addlong(argb4444); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, argb4444[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(argb4444); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, argb4444[EDX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x95); - addlong(argb4444); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_A8I8: - addbyte(0x0f); /*MOVZX ECX, AX*/ - addbyte(0xb7); - addbyte(0xc8); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*MOVD XMM0, ai88[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x04); - addbyte(0x8d); - addlong(ai88); - addbyte(0x66); /*MOVD XMM1, ai88[EAX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x85); - addlong(ai88); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0x0f); /*MOVZX ECX, DX*/ - addbyte(0xb7); - addbyte(0xca); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*MOVD XMM3, ai88[ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x1c); - addbyte(0x8d); - addlong(ai88); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, ai88[EDX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x0c); - addbyte(0x95); - addlong(ai88); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - case TEX_APAL88: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x66); /*MOVD XMM0, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x44); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, AH*/ - addbyte(0xb6); - addbyte(0xcc); - addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xc2); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x66); /*PINSRW XMM0, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc1); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, AL*/ - addbyte(0xb6); - addbyte(0xc8); - addbyte(0x0f); /*MOVZX EAX, AH*/ - addbyte(0xb6); - addbyte(0xc4); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x06); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PINSRW XMM1, EAX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc8); - addbyte(3); - addbyte(0x66); /*MOVD XMM3, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x5c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0xc1); /*SHR EDX, 16*/ - addbyte(0xea); - addbyte(16); - addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xda); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x10); - addbyte(0x66); /*PINSRW XMM3, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xd9); - addbyte(3); - addbyte(0x0f); /*MOVZX ECX, DL*/ - addbyte(0xb6); - addbyte(0xca); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*MOVD XMM1, [EBP+ECX*4]*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0x4c); - addbyte(0x8d); - addbyte(0); - addbyte(0x0f); /*MOVZX ECX, DH*/ - addbyte(0xb6); - addbyte(0xce); - addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ - addbyte(0x0f); - addbyte(0x60); - addbyte(0xca); - addbyte(0x66); /*PMULLW XMM3, bilinear_lookup[ESI]+0x20*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x5e); - addbyte(0x20); - addbyte(0x66); /*PINSR1 XMM1, ECX, 3*/ - addbyte(0x0f); - addbyte(0xc4); - addbyte(0xc9); - addbyte(3); - addbyte(0x66); /*PADDW XMM0, XMM3*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc3); - addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x30*/ - addbyte(0x0f); - addbyte(0xd5); - addbyte(0x4e); - addbyte(0x30); - addbyte(0x66); /*PADDW XMM0, XMM1*/ - addbyte(0x0f); - addbyte(0xfd); - addbyte(0xc1); - addbyte(0x66); /*PSRLW XMM0, 8*/ - addbyte(0x0f); - addbyte(0x71); - addbyte(0xd0); - addbyte(8); - addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ - addbyte(0x0f); - addbyte(0x67); - addbyte(0xc0); - addbyte(0x66); /*MOV EAX, XMM0*/ - addbyte(0x0f); - addbyte(0x7e); - addbyte(0xc0); - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } - - addbyte(0x8b); /*MOV ESI, [ESP+8]*/ - addbyte(0x74); - addbyte(0x24); - addbyte(8+16); /*CHECK!*/ - } - else - { - addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/ - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, lod)); - addbyte(0x8a); /*MOV DL, params->tex_shift[ESI+ECX*4]*/ - addbyte(0x94); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_shift)); - addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ - addbyte(0xac); - addbyte(0x8f); - addlong(offsetof(voodoo_state_t, tex)); - addbyte(0x80); /*ADD CL, 4*/ - addbyte(0xc1); - addbyte(4); - addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ - addbyte(0x87); - addlong(offsetof(voodoo_state_t, tex_s)); - addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_t)); - addbyte(0xd3); /*SHR EAX, CL*/ - addbyte(0xe8); - addbyte(0xd3); /*SHR EBX, CL*/ - addbyte(0xeb); - if (state->clamp_s) - { - addbyte(0x85); /*TEST EAX, EAX*/ - addbyte(0xc0); - addbyte(0x0f); /*CMOVS EAX, zero*/ - addbyte(0x48); - addbyte(0x05); - addlong(&zero); - addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ - addbyte(0x43); - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - - } - else - { - addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ - addbyte(0x84); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_w_mask) - 0x10); - } - if (state->clamp_t) - { - addbyte(0x85); /*TEST EBX, EBX*/ - addbyte(0xdb); - addbyte(0x0f); /*CMOVS EBX, zero*/ - addbyte(0x48); - addbyte(0x1d); - addlong(&zero); - addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ - addbyte(0x43); - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - } - else - { - addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ - addbyte(0x9c); - addbyte(0x8e); - addlong(offsetof(voodoo_params_t, tex_h_mask) - 0x10); - } - addbyte(0x88); /*MOV CL, DL*/ - addbyte(0xd1); - addbyte(0xd3); /*SHL EBX, CL*/ - addbyte(0xe3); - addbyte(0x01); /*ADD EBX, EAX*/ - addbyte(0xc3); - - if (state->tformat & 8) - { - addbyte(0x0f); /*MOVZX EAX,W[EBP+EBX*2]*/ - addbyte(0xb7); - addbyte(0x44); - addbyte(0x5d); - addbyte(0); - } - else - { - addbyte(0x0f); /*MOVZX EAX,B[EBP+EBX]*/ - addbyte(0xb6); - addbyte(0x44); - addbyte(0x1d); - addbyte(0); - } - - switch (state->tformat) - { - case TEX_RGB332: - addbyte(0x8b); /*MOV EAX, rgb332[EAX*4]*/ - addbyte(0x04); - addbyte(0x85); - addlong(rgb332); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_Y4I2Q2: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); -// addbyte(0x0f); /*BSWAP EAX*/ -// addbyte(0xc8); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8: - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0x66); /*MOV BX, AX*/ - addbyte(0x89); - addbyte(0xc3); - addbyte(0x0f); /*BSWAP EAX*/ - addbyte(0xc8); - addbyte(0x66); /*MOV AX, BX*/ - addbyte(0x89); - addbyte(0xd8); - break; - - case TEX_I8: - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); -// addbyte(0x25); /*AND EAX, 0x00ffffff*/ -// addlong(0x00000000); - break; - - case TEX_AI8: - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x83); /*AND EAX, 0x0f*/ - addbyte(0xe0); - addbyte(0x0f); - addbyte(0x81); /*AND EBX, 0xf0*/ - addbyte(0xe3); - addlong(0xf0); - addbyte(0x89); /*MOV ECX, EAX*/ - addbyte(0xc1); - addbyte(0x89); /*MOV EDX, EBX*/ - addbyte(0xda); - addbyte(0xc1); /*SHL ECX, 4*/ - addbyte(0xe1); - addbyte(4); - addbyte(0xc1); /*SHR EDX, 4*/ - addbyte(0xe2); - addbyte(4); - addbyte(0x09); /*OR EAX, ECX*/ - addbyte(0xc8); - addbyte(0x09); /*OR EBX, EDX*/ - addbyte(0xd3); - addbyte(0x88); /*MOV AH, AL*/ - addbyte(0xc4); - addbyte(0xc1); /*SHL EBX, 24*/ - addbyte(0xe3); - addbyte(24); - addbyte(0xc1); /*SHL EAX, 8*/ - addbyte(0xe0); - addbyte(8); - addbyte(0x88); /*MOV AL, AH*/ - addbyte(0xe0); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - break; - - case TEX_PAL8: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); -// addbyte(0x0f); /*BSWAP EAX*/ -// addbyte(0xc8); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_A8Y4I2Q2: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x25); /*AND EAX, 0x000000ff*/ - addlong(0x000000ff); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0xc1); /*SHL EBX, 16*/ - addbyte(0xe3); - addbyte(16); - addbyte(0x81); /*AND EBX, 0xff000000*/ - addbyte(0xe3); - addlong(0xff000000); - addbyte(0x25); /*AND EAX, 0x00ffffff*/ - addlong(0x00ffffff); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - break; - - case TEX_R5G6B5: - addbyte(0x8b); /*MOV EAX, rgb565[EAX*4]*/ - addbyte(0x04); - addbyte(0x85); - addlong(rgb565); - addbyte(0x0d); /*OR EAX, 0xff000000*/ - addlong(0xff000000); - break; - - case TEX_ARGB1555: - addbyte(0x8b); /*MOV EAX, argb1555[EAX*4]*/ - addbyte(0x04); - addbyte(0x85); - addlong(argb1555); - break; - - case TEX_ARGB4444: - addbyte(0x8b); /*MOV EAX, argb4444[EAX*4]*/ - addbyte(0x04); - addbyte(0x85); - addlong(argb4444); - break; - - case TEX_A8I8: - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0xc1); /*SHL EAX, 16*/ - addbyte(0xe0); - addbyte(16); - addbyte(0x88); /*MOV AL, BL*/ - addbyte(0xd8); - addbyte(0x88); /*MOV AH, BL*/ - addbyte(0xdc); - break; - - case TEX_APAL88: - addbyte(0x8b); /*MOV EBP, state->palette[EDI]*/ - addbyte(0xaf); - addlong(offsetof(voodoo_state_t, palette)); - addbyte(0x89); /*MOV EBX, EAX*/ - addbyte(0xc3); - addbyte(0x25); /*AND EAX, 0x000000ff*/ - addlong(0x000000ff); - addbyte(0x8b); /*MOV EAX, [EBP+EAX*4]*/ - addbyte(0x44); - addbyte(0x85); - addbyte(0); - addbyte(0xc1); /*SHL EBX, 16*/ - addbyte(0xe3); - addbyte(16); -// addbyte(0x0f); /*BSWAP EAX*/ -// addbyte(0xc8); - addbyte(0x81); /*AND EBX, 0xff000000*/ - addbyte(0xe3); - addlong(0xff000000); - addbyte(0x25); /*AND EAX, 0x00ffffff*/ - addlong(0x00ffffff); - addbyte(0x09); /*OR EAX, EBX*/ - addbyte(0xd8); - -// addbyte(0x25); /*AND EAX, 0x00ffffff*/ -// addlong(0x00000000); - break; - - default: - fatal("Unknown texture format %i\n", state->tformat); - } - } - if ((params->fbzMode & FBZ_CHROMAKEY)) - { - addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ - addbyte(0x9e); - addlong(offsetof(voodoo_params_t, chromaKey)); - addbyte(0x31); /*XOR EBX, EAX*/ - addbyte(0xc3); - addbyte(0x81); /*AND EBX, 0xffffff*/ - addbyte(0xe3); - addlong(0xffffff); - addbyte(0x0f); /*JE skip*/ - addbyte(0x84); - chroma_skip_pos = block_pos; - addlong(0); - } -#if 0 - addbyte(0x0f); /*MOVZX EBX, AL*/ - addbyte(0xb6); - addbyte(0xd8); - addbyte(0x89); /*MOV state->tex_b[EDI], EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_b)); - addbyte(0x0f); /*MOVZX EBX, AH*/ - addbyte(0xb6); - addbyte(0xdc); - addbyte(0xc1); /*SHR EAX, 16*/ - addbyte(0xe8); - addbyte(16); - addbyte(0x89); /*MOV state->tex_g[EDI], EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_g)); - addbyte(0x0f); /*MOVZX EBX, AL*/ - addbyte(0xb6); - addbyte(0xd8); - addbyte(0x89); /*MOV state->tex_r[EDI], EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_r)); - addbyte(0x0f); /*MOVZX EBX, AH*/ - addbyte(0xb6); - addbyte(0xdc); - addbyte(0x89); /*MOV state->tex_a[EDI], EBX*/ - addbyte(0x9f); - addlong(offsetof(voodoo_state_t, tex_a)); -#endif -//#if 0 -// addbyte(0x89); /*MOV state->tex_out[EDI], EAX*/ -// addbyte(0x87); -// addlong(offsetof(voodoo_state_t, tex_out)); - addbyte(0x66); /*MOVD XMM0, EAX*/ - addbyte(0x0f); - addbyte(0x6e); - addbyte(0xc0); - addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xc1); /*SHR EAX, 8*/ addbyte(0xe8); addbyte(24); addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, tex_a)); -//#endif + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else + { + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM3, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd8); + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend_1) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend_1) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOVQ XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + break; + case TC_MSELECT_AOTHER: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM0, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x83); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x66); /*PXOR XMM0, xmm_ff_w*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x05); + addlong((uint32_t)&xmm_ff_w); + } + addbyte(0x66); /*PADD XMM0, xmm_01_w*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xca); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM5, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xeb); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc5); + addbyte(0x66); /*PSRAD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + if (tc_add_clocal_1) + { + addbyte(0x66); /*PADDW XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xcb); + } + else if (tc_add_alocal_1) + { + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + addbyte(0x66); /*PACKUSWB XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xd9); + if (tca_sub_clocal_1) + { + addbyte(0x66); /*MOVD EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + + if (tca_sub_clocal_1) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/ + addbyte(0x04); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x8e); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + addbyte(0xf7); /*NEG EAX*/ + addbyte(0xd8); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal_1 || tca_add_alocal_1) + { + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x39); /*CMP ECX, EAX*/ + addbyte(0xc1); + addbyte(0x0f); /*CMOVA ECX, EAX*/ + addbyte(0x47); + addbyte(0xc8); + addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0xd8); + addbyte(3); + } + + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + + /*XMM0 = TMU0 output, XMM3 = TMU1 output*/ + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (tc_zero_other) + { + addbyte(0x66); /*PXOR XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc9); + } + else + { + addbyte(0xf3); /*MOV XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xcb); + } + if (tc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + } + + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + break; + case TC_MSELECT_AOTHER: + addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe3); + addbyte(0xff); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/ + addlong(params->detail_bias[0]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[0]*/ + addlong(params->detail_max[0]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/ + addbyte(0xe0); + addbyte(params->detail_scale[0]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xe0); + addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM4, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xa3); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM4, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x25); + addlong(&xmm_ff_w); + } + addbyte(0x66); /*PADDW XMM4, 1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong(&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM5, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PMULLW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xcc); + + + addbyte(0x66); /*PMULHW XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xec); + addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xcd); + addbyte(0x66); /*PSRAD XMM1, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + + if (tc_add_clocal) + { + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + else if (tc_add_alocal) + { + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xfc); + addbyte(0xcc); + } + if (tc_invert_output) + { + addbyte(0x66); /*PXOR XMM1, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x0d); + addlong(&xmm_ff_w); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xdb); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + + if (tca_zero_other) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + else + { + addbyte(0x66); /*MOV EAX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + } + if (tca_sub_clocal) + { + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x29); /*SUB EAX, EBX*/ + addbyte(0xd8); + } + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x66); /*MOV EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(0x24); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EBX, state->lod*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/ + addbyte(0xe3); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVA EBX, EDX*/ + addbyte(0x47); + addbyte(0xda); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/ + addbyte(0x1c); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tca_reverse_blend) + { + addbyte(0x81); /*XOR EBX, 0xFF*/ + addbyte(0xf3); + addlong(0xff); + } + + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + if (tca_add_clocal || tca_add_alocal) + { + addbyte(0x66); /*MOV EBX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x0f); /*CMOVS EAX, EDX*/ + addbyte(0x48); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + if (tca_invert_output) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + + addbyte(0xf3); /*MOVQ XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc1); } + if ((params->fbzMode & FBZ_CHROMAKEY)) + { + addbyte(0x66); /*MOVD EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, chromaKey)); + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x81); /*AND EBX, 0xffffff*/ + addbyte(0xe3); + addlong(0xffffff); + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + chroma_skip_pos = block_pos; + addlong(0); + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + addbyte(0xb8); /*MOV EAX, 0x000001*/ + if (voodoo->dual_tmus) + { + addlong(0x0000c1); + } + else + { + addlong(0x000001); + } + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + } if (params->alphaMode & ((1 << 0) | (1 << 4))) { @@ -3637,6 +2913,62 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x24); addbyte(8+16); + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[1].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[1].dWdX)); + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + } + addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + } + addbyte(0xf3); /*MOVDQU XMM1, state->ib[EDI]*/ addbyte(0x0f); addbyte(0x6f); @@ -3760,6 +3092,24 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addlong(offsetof(voodoo_state_t, pixel_count)); addbyte(1); + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + { + addbyte(0x83); /*ADD state->texel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(1); + } + else + { + addbyte(0x83); /*ADD state->texel_count[EDI], 2*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(2); + } + } addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ addbyte(0x87); addlong(offsetof(voodoo_state_t, x)); @@ -3792,6 +3142,9 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo addbyte(0x5d); /*POP EBP*/ addbyte(0xC3); /*RET*/ + + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + cs = cs; } static int voodoo_recomp = 0; static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) @@ -3810,8 +3163,8 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, params->fbzMode == data->fbzMode && params->fogMode == data->fogMode && params->fbzColorPath == data->fbzColorPath && - (voodoo->trexInit1 & (1 << 18)) == data->trexInit1 && - params->textureMode == data->textureMode) + (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 && + params->textureMode[0] == data->textureMode) { last_block[odd_even] = b; return data->code_block; @@ -3830,8 +3183,8 @@ voodoo_recomp++; data->fbzMode = params->fbzMode; data->fogMode = params->fogMode; data->fbzColorPath = params->fbzColorPath; - data->trexInit1 = voodoo->trexInit1 & (1 << 18); - data->textureMode = params->textureMode; + data->trexInit1 = voodoo->trexInit1[0] & (1 << 18); + data->textureMode = params->textureMode[0]; next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; @@ -3878,12 +3231,12 @@ static void voodoo_codegen_init(voodoo_t *voodoo) d[2] = (16 - _ds) * dt; d[3] = _ds * dt; - bilinear_lookup[c*4] = _mm_set_epi32(0, 0, d[0] | (d[0] << 16), d[0] | (d[0] << 16)); - bilinear_lookup[c*4 + 1] = _mm_set_epi32(0, 0, d[1] | (d[1] << 16), d[1] | (d[1] << 16)); - bilinear_lookup[c*4 + 2] = _mm_set_epi32(0, 0, d[2] | (d[2] << 16), d[2] | (d[2] << 16)); - bilinear_lookup[c*4 + 3] = _mm_set_epi32(0, 0, d[3] | (d[3] << 16), d[3] | (d[3] << 16)); + bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16)); + bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16)); } alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16)); + xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0); + xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); } static void voodoo_codegen_close(voodoo_t *voodoo) diff --git a/src/win.c b/src/win.c index 2fb56b520..e6a27dbd6 100644 --- a/src/win.c +++ b/src/win.c @@ -950,6 +950,8 @@ void atapi_close(void) } } +char *floppy_image_extensions = "All floppy images (*.12;*.144;*.360;*.720;*.86F;*.BIN;*.DSK;*.FDI;*.FLP;*.IMA;*.IMD;*.IMG;*.TD0;*.VFD;*.XDF)\0*.12;*.144;*.360;*.720;*.86F;*.BIN;*.DSK;*.FDI;*.FLP;*.IMA;*.IMD;*.IMG;*.TD0;*.VFD;*.XDF\0Advanced sector-based images (*.IMD;*.TD0)\0*.IMD;*.TD0\0Basic sector-based images (*.12;*.144;*.360;*.720;*.BIN;*.DSK;*.FDI;*.FLP;*.IMA;*.IMG;*.VFD;*.XDF)\0*.12;*.144;*.360;*.720;*.BIN;*.DSK;*.FDI;*.FLP;*.IMA;*.IMG;*.VFD;*.XDF\0Flux images (*.FDI)\0*.FDI\0Surface-based images (*.86F)\0*.86F\0All files (*.*)\0*.*\0"; + LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) { HMENU hmenu; @@ -1001,7 +1003,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM break; case IDM_DISC_A: case IDM_DISC_A_WP: - if (!getfile(hwnd,"Disc image (*.12;*.144;*.360;*.720;*.86F;*.DSK;*.IMG;*.IMA;*.FDI;*.FLP;*.TD0;*.VFD;*.XDF)\0*.12;*.144;*.360;*.720;*.86F;*.DSK;*.IMG;*.IMA;*.FDI;*.FLP;*.TD0;*.VFD;*.XDF\0All files (*.*)\0*.*\0",discfns[0])) + if (!getfile(hwnd, floppy_image_extensions, discfns[0])) { disc_close(0); ui_writeprot[0] = (LOWORD(wParam) == IDM_DISC_A_WP) ? 1 : 0; @@ -1011,7 +1013,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM break; case IDM_DISC_B: case IDM_DISC_B_WP: - if (!getfile(hwnd,"Disc image (*.12;*.144;*.360;*.720;*.86F;*.DSK;*.IMG;*.IMA;*.FDI;*.FLP;*.TD0;*.VFD;*.XDF)\0*.12;*.144;*.360;*.720;*.86F;*.DSK;*.IMG;*.IMA;*.FDI;*.FLP;*.TD0;*.VFD;*.XDF\0All files (*.*)\0*.*\0",discfns[1])) + if (!getfile(hwnd, floppy_image_extensions, discfns[1])) { disc_close(1); ui_writeprot[1] = (LOWORD(wParam) == IDM_DISC_B_WP) ? 1 : 0;