Brought the Voodoo files in line with the mainline PCem code - fixes all warnings;

Applied all the mainline PCem commits;
Fixed some sound-related warnings.
This commit is contained in:
OBattler
2017-06-14 20:35:58 +02:00
parent 62bfb60a7b
commit c0a8830d5e
13 changed files with 517 additions and 373 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -42,26 +42,26 @@ static int last_block[2] = {0, 0};
static int next_block_to_write[2] = {0, 0};
#define addbyte(val) \
code_block[block_pos++] = (uint8_t)val; \
code_block[block_pos++] = val; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addword(val) \
*(uint16_t *)&code_block[block_pos] = (uint16_t)val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
#define addword(val) \
*(uint16_t *)&code_block[block_pos] = val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addlong(val) \
*(uint32_t *)&code_block[block_pos] = (uint32_t)val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
#define addlong(val) \
*(uint32_t *)&code_block[block_pos] = val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addquad(val) \
*(uint64_t *)&code_block[block_pos] = (uint64_t)val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
#define addquad(val) \
*(uint64_t *)&code_block[block_pos] = val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
@@ -70,7 +70,6 @@ static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull;
static __m128i xmm_ff_b;// = 0x00000000ffffffffull;
static uint32_t zero = 0;
static double const_1_48 = (double)(1ull << 4);
static __m128i alookup[257], aminuslookup[256];
static __m128i minus_254;// = 0xff02ff02ff02ff02ull;
@@ -161,7 +160,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x0f); /*MOVZX EAX, logtable[RAX]*/
addbyte(0xb6);
addbyte(0x80);
addlong((uint32_t)logtable);
addlong((uint32_t)(uintptr_t)logtable);
addbyte(0x09); /*OR EAX, EDX*/
addbyte(0xd0);
addbyte(0x03); /*ADD EAX, state->lod*/
@@ -339,7 +338,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x48);
addbyte(0x14);
addbyte(0x25);
addlong(&zero);
addlong((uint32_t)(uintptr_t)&zero);
addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/
addbyte(0x96);
addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
@@ -353,7 +352,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x48);
addbyte(0x1c);
addbyte(0x25);
addlong(&zero);
addlong((uint32_t)(uintptr_t)&zero);
addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/
addbyte(0x9e);
addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
@@ -400,7 +399,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x48);
addbyte(0x04);
addbyte(0x25);
addlong(&zero);
addlong((uint32_t)(uintptr_t)&zero);
addbyte(0x78); /*JS + - clamp on 0*/
addbyte(2+3+2+ 5+5+2);
addbyte(0x3b); /*CMP EAX, EBP*/
@@ -501,7 +500,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x49); /*MOV R8, bilinear_lookup*/
addbyte(0xb8);
addquad(bilinear_lookup);
addquad((uintptr_t)bilinear_lookup);
addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
addbyte(0x0f);
@@ -615,7 +614,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x48);
addbyte(0x04);
addbyte(0x25);
addlong(&zero);
addlong((uint32_t)(uintptr_t)&zero);
addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/
addbyte(0x84);
addbyte(0x8e);
@@ -642,7 +641,7 @@ static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, v
addbyte(0x48);
addbyte(0x1c);
addbyte(0x25);
addlong(&zero);
addlong((uint32_t)(uintptr_t)&zero);
addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/
addbyte(0x9c);
addbyte(0x8e);
@@ -1080,7 +1079,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x0f);
addbyte(0xef);
addbyte(0x83);
addlong((uint32_t)&xmm_00_ff_w[0]);
addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]);
}
else if (!tc_reverse_blend_1)
{
@@ -1089,14 +1088,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xef);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
}
addbyte(0x66); /*PADDW XMM0, xmm_01_w*/
addbyte(0x0f);
addbyte(0xfd);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)&xmm_01_w);
addlong((uint32_t)(uintptr_t)&xmm_01_w);
addbyte(0xf3); /*MOVQ XMM1, XMM2*/
addbyte(0x0f);
addbyte(0x7e);
@@ -1217,7 +1216,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/
addbyte(0x04);
addbyte(0x8d);
addlong((uint32_t)i_00_ff_w);
addlong((uint32_t)(uintptr_t)i_00_ff_w);
}
else if (!tc_reverse_blend_1)
{
@@ -1404,7 +1403,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x0f);
addbyte(0xef);
addbyte(0xa3);
addlong((uint32_t)&xmm_00_ff_w[0]);
addlong((uint32_t)(uintptr_t)&xmm_00_ff_w[0]);
}
else if (!tc_reverse_blend)
{
@@ -1413,14 +1412,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xef);
addbyte(0x24);
addbyte(0x25);
addlong(&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
}
addbyte(0x66); /*PADDW XMM4, 1*/
addbyte(0x0f);
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong(&xmm_01_w);
addlong((uint32_t)(uintptr_t)&xmm_01_w);
addbyte(0xf3); /*MOVQ XMM5, XMM1*/
addbyte(0x0f);
addbyte(0x7e);
@@ -1488,7 +1487,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x0f);
addbyte(0xef);
addbyte(0x0d);
addlong(&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
}
addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
@@ -1585,7 +1584,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/
addbyte(0x1c);
addbyte(0x8d);
addlong((uint32_t)i_00_ff_w);
addlong((uint32_t)(uintptr_t)i_00_ff_w);
}
else if (!tca_reverse_blend)
{
@@ -2143,14 +2142,14 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xef);
addbyte(0x1c);
addbyte(0x25);
addlong(&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
}
addbyte(0x66); /*PADDW XMM3, 1*/
addbyte(0x0f);
addbyte(0xfd);
addbyte(0x1c);
addbyte(0x25);
addlong(&xmm_01_w);
addlong((uint32_t)(uintptr_t)&xmm_01_w);
addbyte(0x66); /*PMULLW XMM0, XMM3*/
addbyte(0x0f);
addbyte(0xd5);
@@ -2194,7 +2193,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xef);
addbyte(0x04);
addbyte(0x25);
addlong(&xmm_ff_b);
addlong((uint32_t)(uintptr_t)&xmm_ff_b);
}
if (params->fogMode & FOG_ENABLE)
@@ -2441,7 +2440,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
{
addbyte(0x49); /*MOV R8, rgb565*/
addbyte(0xb8);
addquad(rgb565);
addquad((uintptr_t)rgb565);
addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
addbyte(0x87);
addlong(offsetof(voodoo_state_t, x));
@@ -2489,7 +2488,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xd5);
addbyte(0x24);
addbyte(0xd5);
addlong(alookup);
addlong((uint32_t)(uintptr_t)alookup);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2499,7 +2498,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2529,7 +2528,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2555,7 +2554,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xd5);
addbyte(0x24);
addbyte(0xd5);
addlong(aminuslookup);
addlong((uint32_t)(uintptr_t)aminuslookup);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2565,7 +2564,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2587,7 +2586,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x7e);
addbyte(0x2c);
addbyte(0x25);
addlong(&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
addbyte(0x66); /*PSUBW XMM5, XMM0*/
addbyte(0x0f);
addbyte(0xf9);
@@ -2605,7 +2604,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2633,7 +2632,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xd5);
addbyte(0x24);
addbyte(0xd5);
addlong(&minus_254);
addlong((uint32_t)(uintptr_t)&minus_254);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2643,7 +2642,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x24);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2674,7 +2673,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xd5);
addbyte(0x04);
addbyte(0xd5);
addlong(alookup);
addlong((uint32_t)(uintptr_t)alookup);
addbyte(0xf3); /*MOVQ XMM5, XMM0*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2684,7 +2683,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2714,7 +2713,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2740,7 +2739,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xd5);
addbyte(0x04);
addbyte(0xd5);
addlong(aminuslookup);
addlong((uint32_t)(uintptr_t)aminuslookup);
addbyte(0xf3); /*MOVQ XMM5, XMM0*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2750,7 +2749,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2772,7 +2771,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0x7e);
addbyte(0x2c);
addbyte(0x25);
addlong(&xmm_ff_w);
addlong((uint32_t)(uintptr_t)&xmm_ff_w);
addbyte(0x66); /*PSUBW XMM5, XMM6*/
addbyte(0x0f);
addbyte(0xf9);
@@ -2790,7 +2789,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
addbyte(0xfd);
addbyte(0x04);
addbyte(0x25);
addlong((uint32_t)alookup + 16);
addlong((uint32_t)(uintptr_t)alookup + 16);
addbyte(0x66); /*PSRLW XMM5, 8*/
addbyte(0x0f);
addbyte(0x71);
@@ -2842,7 +2841,7 @@ static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo
{
addbyte(0x49); /*MOV R8, dither_rb*/
addbyte(0xb8);
addquad(dither2x2 ? dither_rb2x2 : dither_rb);
addquad(dither2x2 ? (uintptr_t)dither_rb2x2 : (uintptr_t)dither_rb);
addbyte(0x4c); /*MOV ESI, real_y (R14)*/
addbyte(0x89);
addbyte(0xf6);

View File

@@ -40,43 +40,43 @@ static int last_block[2] = {0, 0};
static int next_block_to_write[2] = {0, 0};
#define addbyte(val) \
code_block[block_pos++] = (uint8_t)val; \
code_block[block_pos++] = val; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addword(val) \
*(uint16_t *)&code_block[block_pos] = (uint16_t)val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
#define addword(val) \
*(uint16_t *)&code_block[block_pos] = val; \
block_pos += 2; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addlong(val) \
*(uint32_t *)&code_block[block_pos] = (uint32_t)val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
#define addlong(val) \
*(uint32_t *)&code_block[block_pos] = val; \
block_pos += 4; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
#define addquad(val) \
*(uint64_t *)&code_block[block_pos] = (uint64_t)val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
#define addquad(val) \
*(uint64_t *)&code_block[block_pos] = val; \
block_pos += 8; \
if (block_pos >= BLOCK_SIZE) \
fatal("Over!\n")
static __m128i xmm_01_w;
static __m128i xmm_ff_w;
static __m128i xmm_ff_b;
static __m128i xmm_01_w;// = 0x0001000100010001ull;
static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull;
static __m128i xmm_ff_b;// = 0x00000000ffffffffull;
static uint32_t zero = 0;
static double const_1_48 = (double)(1ull << 4);
static __m128i alookup[257], aminuslookup[256];
static __m128i minus_254;
static __m128i minus_254;// = 0xff02ff02ff02ff02ull;
static __m128i bilinear_lookup[256*2];
static __m128i xmm_00_ff_w[2];
static uint32_t i_00_ff_w[2] = {0, 0xff};
static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu)
static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu)
{
if (params->textureMode[tmu] & 1)
{
@@ -85,7 +85,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w));
addbyte(0xdd); /*FLDq const_1_48*/
addbyte(0x05);
addlong(&const_1_48);
addlong((uint32_t)&const_1_48);
addbyte(0xde); /*FDIV ST(1)*/
addbyte(0xf1);
addbyte(0xdf); /*FILDq state->tmu0_s*/
@@ -129,7 +129,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/
addbyte(0xb6);
addbyte(0x9b);
addlong(logtable);
addlong((uint32_t)logtable);
addbyte(0x09); /*OR EAX, EBX*/
addbyte(0xd8);
addbyte(0x03); /*ADD EAX, state->lod*/
@@ -322,7 +322,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*CMOVS EDX, zero*/
addbyte(0x48);
addbyte(0x15);
addlong(&zero);
addlong((uint32_t)&zero);
addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/
addbyte(0x96);
addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
@@ -335,7 +335,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*CMOVS EBX, zero*/
addbyte(0x48);
addbyte(0x1d);
addlong(&zero);
addlong((uint32_t)&zero);
addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/
addbyte(0x9e);
addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
@@ -379,7 +379,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*CMOVS EAX, zero*/
addbyte(0x48);
addbyte(0x05);
addlong(&zero);
addlong((uint32_t)&zero);
addbyte(0x78); /*JS + - clamp on 0*/
addbyte(2+3+2+ 5+5+2);
addbyte(0x3b); /*CMP EAX, EBP*/
@@ -489,7 +489,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x81); /*ADD ESI, bilinear_lookup*/
addbyte(0xc6);
addlong(bilinear_lookup);
addlong((uint32_t)bilinear_lookup);
addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/
addbyte(0x0f);
@@ -592,7 +592,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*CMOVS EAX, zero*/
addbyte(0x48);
addbyte(0x05);
addlong(&zero);
addlong((uint32_t)&zero);
addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/
addbyte(0x84);
addbyte(0x8e);
@@ -618,7 +618,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
addbyte(0x0f); /*CMOVS EBX, zero*/
addbyte(0x48);
addbyte(0x1d);
addlong(&zero);
addlong((uint32_t)&zero);
addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/
addbyte(0x9c);
addbyte(0x8e);
@@ -653,7 +653,7 @@ static __inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo,
return block_pos;
}
static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop)
static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop)
{
int block_pos = 0;
int z_skip_pos = 0;
@@ -662,10 +662,24 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
int depth_jump_pos = 0;
int depth_jump_pos2 = 0;
int loop_jump_pos = 0;
// xmm_01_w = (__m128i)0x0001000100010001ull;
// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull;
// xmm_ff_b = (__m128i)0x00000000ffffffffull;
xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001);
xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff);
xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff);
minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02);
// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull;
// block_pos = 0;
// voodoo_get_depth = &code_block[block_pos];
/*W at (%esp+4)
Z at (%esp+12)
new_depth at (%esp+16)*/
// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER))
// {
// addbyte(0xC3); /*RET*/
// return;
// }
addbyte(0x55); /*PUSH EBP*/
addbyte(0x57); /*PUSH EDI*/
addbyte(0x56); /*PUSH ESI*/
@@ -697,6 +711,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x75); /*JNZ got_depth*/
depth_jump_pos = block_pos;
addbyte(0);
// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
addbyte(0x8b); /*MOV EDX, w*/
addbyte(0x97);
addlong(offsetof(voodoo_state_t, w));
@@ -710,6 +725,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x74); /*JZ got_depth*/
depth_jump_pos2 = block_pos;
addbyte(0);
// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
addbyte(0xb9); /*MOV ECX, 19*/
addlong(19);
addbyte(0x0f); /*BSR EAX, EDX*/
@@ -875,8 +891,17 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER))
{
addbyte(0xC3); /*RET*/
// addbyte(0x30); /*XOR EAX, EAX*/
// addbyte(0xc0);
}
// else
// {
// addbyte(0xb0); /*MOV AL, 1*/
// addbyte(1);
// }
// voodoo_combine = &code_block[block_pos];
/*XMM0 = colour*/
/*XMM2 = 0 (for unpacking*/
@@ -1367,13 +1392,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0xef);
addbyte(0x25);
addlong(&xmm_ff_w);
addlong((uint32_t)&xmm_ff_w);
}
addbyte(0x66); /*PADDW XMM4, 1*/
addbyte(0x0f);
addbyte(0xfd);
addbyte(0x25);
addlong(&xmm_01_w);
addlong((uint32_t)&xmm_01_w);
addbyte(0xf3); /*MOVQ XMM5, XMM1*/
addbyte(0x0f);
addbyte(0x7e);
@@ -1441,7 +1466,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0xef);
addbyte(0x0d);
addlong(&xmm_ff_w);
addlong((uint32_t)&xmm_ff_w);
}
addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
@@ -2096,13 +2121,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0xef);
addbyte(0x1d);
addlong(&xmm_ff_w);
addlong((uint32_t)&xmm_ff_w);
}
addbyte(0x66); /*PADDW XMM3, 1*/
addbyte(0x0f);
addbyte(0xfd);
addbyte(0x1d);
addlong(&xmm_01_w);
addlong((uint32_t)&xmm_01_w);
addbyte(0x66); /*PMULLW XMM0, XMM3*/
addbyte(0x0f);
addbyte(0xd5);
@@ -2145,8 +2170,14 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0xef);
addbyte(0x05);
addlong(&xmm_ff_b);
addlong((uint32_t)&xmm_ff_b);
}
//#if 0
// addbyte(0x66); /*MOVD state->out[EDI], XMM0*/
// addbyte(0x0f);
// addbyte(0x7e);
// addbyte(0x87);
// addlong(offsetof(voodoo_state_t, out));
if (params->fogMode & FOG_ENABLE)
{
if (params->fogMode & FOG_CONSTANT)
@@ -2241,6 +2272,11 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(10);
addbyte(0x01); /*ADD EAX, EBX*/
addbyte(0xd8);
/* int fog_idx = (w_depth >> 10) & 0x3f;
fog_a = params->fogTable[fog_idx].fog;
fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/
break;
case FOG_Z:
@@ -2252,6 +2288,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(12);
addbyte(0x25); /*AND EAX, 0xff*/
addlong(0xff);
// fog_a = (z >> 20) & 0xff;
break;
case FOG_ALPHA:
@@ -2273,6 +2310,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f); /*CMOVAE EAX, EBX*/
addbyte(0x43);
addbyte(0xc3);
// fog_a = CLAMP(ia >> 12);
break;
case FOG_W:
@@ -2293,10 +2331,12 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f); /*CMOVAE EAX, EBX*/
addbyte(0x43);
addbyte(0xc3);
// fog_a = CLAMP(w >> 32);
break;
}
addbyte(0x01); /*ADD EAX, EAX*/
addbyte(0xc0);
// fog_a++;
addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/
addbyte(0x0f);
@@ -2419,7 +2459,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x6e);
addbyte(0x24);
addbyte(0x85);
addlong(rgb565);
addlong((uint32_t)rgb565);
addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/
addbyte(0x0f);
addbyte(0x60);
@@ -2443,7 +2483,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0xd5);
addbyte(0x24);
addbyte(0xd5);
addlong(alookup);
addlong((uint32_t)alookup);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2507,7 +2547,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0xd5);
addbyte(0x24);
addbyte(0xd5);
addlong(aminuslookup);
addlong((uint32_t)aminuslookup);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2537,7 +2577,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0x7e);
addbyte(0x2d);
addlong(&xmm_ff_w);
addlong((uint32_t)&xmm_ff_w);
addbyte(0x66); /*PSUBW XMM5, XMM0*/
addbyte(0x0f);
addbyte(0xf9);
@@ -2581,7 +2621,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0xd5);
addbyte(0x25);
addlong(&minus_254);
addlong((uint32_t)&minus_254);
addbyte(0xf3); /*MOVQ XMM5, XMM4*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2621,7 +2661,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0xd5);
addbyte(0x04);
addbyte(0xd5);
addlong(alookup);
addlong((uint32_t)alookup);
addbyte(0xf3); /*MOVQ XMM5, XMM0*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2685,7 +2725,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0xd5);
addbyte(0x04);
addbyte(0xd5);
addlong(aminuslookup);
addlong((uint32_t)aminuslookup);
addbyte(0xf3); /*MOVQ XMM5, XMM0*/
addbyte(0x0f);
addbyte(0x7e);
@@ -2715,7 +2755,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x0f);
addbyte(0x7e);
addbyte(0x2d);
addlong(&xmm_ff_w);
addlong((uint32_t)&xmm_ff_w);
addbyte(0x66); /*PSUBW XMM5, XMM6*/
addbyte(0x0f);
addbyte(0xf9);
@@ -2768,6 +2808,13 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0x67);
addbyte(0xc0);
}
//#endif
// addbyte(0x8b); /*MOV EDX, x (ESP+12)*/
// addbyte(0x54);
// addbyte(0x24);
// addbyte(12);
addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
addbyte(0x97);
@@ -2780,6 +2827,10 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
if (params->fbzMode & FBZ_RGB_WMASK)
{
// addbyte(0x89); /*MOV state->rgb_out[EDI], EAX*/
// addbyte(0x87);
// addlong(offsetof(voodoo_state_t, rgb_out));
if (dither)
{
addbyte(0x8b); /*MOV ESI, real_y (ESP+16)*/
@@ -2857,17 +2908,17 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
addbyte(0xb6);
addbyte(0x9c);
addbyte(0x33);
addlong(dither2x2 ? dither_g2x2 : dither_g);
addlong(dither2x2 ? (uint32_t)dither_g2x2 : (uint32_t)dither_g);
addbyte(0x0f); /*MOVZX ECX, dither_rb[ECX+ESI]*/
addbyte(0xb6);
addbyte(0x8c);
addbyte(0x31);
addlong(dither2x2 ? dither_rb2x2 : dither_rb);
addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb);
addbyte(0x0f); /*MOVZX EAX, dither_rb[EAX+ESI]*/
addbyte(0xb6);
addbyte(0x84);
addbyte(0x30);
addlong(dither2x2 ? dither_rb2x2 : dither_rb);
addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb);
addbyte(0xc1); /*SHL EBX, 5*/
addbyte(0xe3);
addbyte(5);
@@ -3181,7 +3232,7 @@ static __inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, vood
}
static int voodoo_recomp = 0;
static __inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even)
static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even)
{
int c;
int b = last_block[odd_even];
@@ -3211,6 +3262,7 @@ static __inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params
}
voodoo_recomp++;
data = &codegen_data[odd_even + next_block_to_write[odd_even]*2];
// code_block = data->code_block;
voodoo_generate(data->code_block, voodoo, params, state, depth_op);