Ported the latest Voodoo patches from PCem.

This commit is contained in:
TC1995
2020-11-18 00:21:07 +01:00
parent d2ad8fce43
commit 7bb51b4c7a
37 changed files with 13020 additions and 7001 deletions

View File

@@ -5,15 +5,16 @@
fbzColorPath
*/
#ifdef __linux__
# include <sys/mman.h>
# include <unistd.h>
#if defined(__linux__) || defined(__APPLE__)
#include <sys/mman.h>
#include <unistd.h>
#endif
#if defined WIN32 || defined _WIN32 || defined _WIN32
# include <windows.h>
#define BITMAP windows_BITMAP
#include <windows.h>
#undef BITMAP
#endif
#include <intrin.h>
#include <xmmintrin.h>
#define BLOCK_NUM 8
@@ -32,35 +33,43 @@ typedef struct voodoo_x86_data_t
uint32_t fbzColorPath;
uint32_t textureMode[2];
uint32_t tLOD[2];
uint32_t trexInit1;
uint32_t trexInit1;
int is_tiled;
} voodoo_x86_data_t;
static int last_block[2] = {0, 0};
static int next_block_to_write[2] = {0, 0};
static int last_block[4] = {0, 0};
static int next_block_to_write[4] = {0, 0};
#define addbyte(val) \
if (block_pos < BLOCK_SIZE) \
code_block[block_pos++] = val; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addbyte(val) \
do { \
code_block[block_pos++] = val; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n"); \
} while (0)
#define addword(val) \
*(uint16_t *)&code_block[block_pos] = val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addword(val) \
do { \
*(uint16_t *)&code_block[block_pos] = val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n"); \
} while (0)
#define addlong(val) \
*(uint32_t *)&code_block[block_pos] = val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addlong(val) \
do { \
*(uint32_t *)&code_block[block_pos] = val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n"); \
} while (0)
#define addquad(val) \
*(uint64_t *)&code_block[block_pos] = val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addquad(val) \
do { \
*(uint64_t *)&code_block[block_pos] = val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n"); \
} while (0)
static __m128i xmm_01_w;// = 0x0001000100010001ull;
@@ -694,6 +703,28 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x74);
addbyte(0x24);
addbyte(8+16);
if (params->col_tiled || params->aux_tiled)
{
addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
addbyte(0x87);
addlong(offsetof(voodoo_state_t, x));
addbyte(0x89); /*MOV EBX, EAX*/
addbyte(0xc3);
addbyte(0x83); /*AND EAX, 63*/
addbyte(0xe0);
addbyte(63);
addbyte(0xc1); /*SHR EBX, 6*/
addbyte(0xeb);
addbyte(6);
addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/
addbyte(0xe3);
addbyte(11);
addbyte(0x01); /*ADD EAX, EBX*/
addbyte(0xd8);
addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/
addbyte(0x87);
addlong(offsetof(voodoo_state_t, x_tiled));
}
addbyte(0x66); /*PXOR XMM2, XMM2*/
addbyte(0x0f);
addbyte(0xef);
@@ -826,7 +857,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
{
addbyte(0x8b); /*MOV EBX, state->x[EDI]*/
addbyte(0x9f);
addlong(offsetof(voodoo_state_t, x));
if (voodoo->aux_tiled)
addlong(offsetof(voodoo_state_t, x_tiled));
else
addlong(offsetof(voodoo_state_t, x));
addbyte(0x8b);/*MOV ECX, aux_mem[EDI]*/
addbyte(0x8f);
addlong(offsetof(voodoo_state_t, aux_mem));
@@ -2436,7 +2470,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
{
addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
addbyte(0x87);
addlong(offsetof(voodoo_state_t, x));
if (params->col_tiled)
addlong(offsetof(voodoo_state_t, x_tiled));
else
addlong(offsetof(voodoo_state_t, x));
addbyte(0x8b); /*MOV EBP, fb_mem*/
addbyte(0xaf);
addlong(offsetof(voodoo_state_t, fb_mem));
@@ -2815,8 +2852,11 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
addbyte(0x97);
addlong(offsetof(voodoo_state_t, x));
if (params->col_tiled)
addlong(offsetof(voodoo_state_t, x_tiled));
else
addlong(offsetof(voodoo_state_t, x));
addbyte(0x66); /*MOV EAX, XMM0*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2884,7 +2924,10 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
}
addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
addbyte(0x97);
addlong(offsetof(voodoo_state_t, x));
if (params->col_tiled)
addlong(offsetof(voodoo_state_t, x_tiled));
else
addlong(offsetof(voodoo_state_t, x));
if (dither2x2)
{
addbyte(0xc1); /*SHL ECX, 2*/
@@ -2968,6 +3011,12 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE))
{
addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
addbyte(0x97);
if (params->aux_tiled)
addlong(offsetof(voodoo_state_t, x_tiled));
else
addlong(offsetof(voodoo_state_t, x));
addbyte(0x66); /*MOV AX, new_depth*/
addbyte(0x8b);
addbyte(0x87);
@@ -3227,7 +3276,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
cs = cs;
}
static int voodoo_recomp = 0;
int voodoo_recomp = 0;
static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even)
{
@@ -3238,7 +3287,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params,
for (c = 0; c < 8; c++)
{
data = &codegen_data[odd_even + b*2];
data = &codegen_data[odd_even + b*4];
if (state->xdir == data->xdir &&
params->alphaMode == data->alphaMode &&
@@ -3249,7 +3298,8 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params,
params->textureMode[0] == data->textureMode[0] &&
params->textureMode[1] == data->textureMode[1] &&
(params->tLOD[0] & LOD_MASK) == data->tLOD[0] &&
(params->tLOD[1] & LOD_MASK) == data->tLOD[1])
(params->tLOD[1] & LOD_MASK) == data->tLOD[1] &&
((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled)
{
last_block[odd_even] = b;
return data->code_block;
@@ -3258,7 +3308,7 @@ static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params,
b = (b + 1) & 7;
}
voodoo_recomp++;
data = &codegen_data[odd_even + next_block_to_write[odd_even]*2];
data = &codegen_data[odd_even + next_block_to_write[odd_even]*4];
// code_block = data->code_block;
voodoo_generate(data->code_block, voodoo, params, state, depth_op);
@@ -3273,16 +3323,17 @@ voodoo_recomp++;
data->textureMode[1] = params->textureMode[1];
data->tLOD[0] = params->tLOD[0] & LOD_MASK;
data->tLOD[1] = params->tLOD[1] & LOD_MASK;
data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0;
next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7;
return data->code_block;
}
static void voodoo_codegen_init(voodoo_t *voodoo)
void voodoo_codegen_init(voodoo_t *voodoo)
{
int c;
#ifdef __linux__
#if defined(__linux__) || defined(__APPLE__)
void *start;
size_t len;
long pagesize = sysconf(_SC_PAGESIZE);
@@ -3290,19 +3341,9 @@ static void voodoo_codegen_init(voodoo_t *voodoo)
#endif
#if defined WIN32 || defined _WIN32 || defined _WIN32
voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*2, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
voodoo->codegen_data = malloc(sizeof(voodoo_x86_data_t) * BLOCK_NUM*2);
#endif
#ifdef __linux__
start = (void *)((long)voodoo->codegen_data & pagemask);
len = ((sizeof(voodoo_x86_data_t) * BLOCK_NUM*2) + pagesize) & pagemask;
if (mprotect(start, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
{
perror("mprotect");
exit(-1);
}
voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0);
#endif
for (c = 0; c < 256; c++)
@@ -3327,11 +3368,11 @@ static void voodoo_codegen_init(voodoo_t *voodoo)
xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16));
}
static void voodoo_codegen_close(voodoo_t *voodoo)
void voodoo_codegen_close(voodoo_t *voodoo)
{
#if defined WIN32 || defined _WIN32 || defined _WIN32
VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE);
#else
free(voodoo->codegen_data);
munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4);
#endif
}