Progress on ARM64 MMX opcodes

This commit is contained in:
Cacodemon345
2025-09-15 09:19:40 +00:00
parent d824fc36df
commit 1d8877fba7
10 changed files with 41 additions and 12 deletions

View File

@@ -31,9 +31,9 @@ static struct {
} codegen_instructions[MAX_INSTRUCTION_COUNT];
void
codegen_print_mmx(const char* str)
codegen_print_mmx(const char* str, uint32_t fetchdat)
{
pclog("MMX results: %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX (%s)\n", (unsigned long long)cpu_state.MM[0].q, (unsigned long long)cpu_state.MM[1].q, (unsigned long long)cpu_state.MM[2].q, (unsigned long long)cpu_state.MM[3].q, (unsigned long long)cpu_state.MM[4].q, (unsigned long long)cpu_state.MM[5].q, (unsigned long long)cpu_state.MM[6].q, (unsigned long long)cpu_state.MM[7].q, str);
pclog("MMX results: %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX (%s, fetchdat 0x%08X)\n", (unsigned long long)cpu_state.MM[0].q, (unsigned long long)cpu_state.MM[1].q, (unsigned long long)cpu_state.MM[2].q, (unsigned long long)cpu_state.MM[3].q, (unsigned long long)cpu_state.MM[4].q, (unsigned long long)cpu_state.MM[5].q, (unsigned long long)cpu_state.MM[6].q, (unsigned long long)cpu_state.MM[7].q, str, fetchdat);
}
int

View File

@@ -102,6 +102,10 @@
# define OPCODE_SUB_LSR (0x25a << 21)
# define OPCODE_SUBX_LSL (0x658 << 21)
# define OPCODE_INS_B (0x6e010400)
# define OPCODE_INS_H (0x6e020400)
# define OPCODE_INS_S (0x6e040400)
# define OPCODE_INS_D (0x6e080400)
# define OPCODE_ADD_V8B (0x0e208400)
# define OPCODE_ADD_V4H (0x0e608400)
# define OPCODE_ADD_V2S (0x0ea08400)
@@ -180,7 +184,7 @@
# define OPCODE_SQSUB_V8B (0x0e202c00)
# define OPCODE_SQSUB_V4H (0x0e602c00)
# define OPCODE_SQXTN_V8B_8H (0x0e214800)
# define OPCODE_SQXTUN_V8B_8H (0x7e212800)
# define OPCODE_SQXTUN_V8B_8H (0x2e212800)
# define OPCODE_SQXTN_V4H_4S (0x0e614800)
# define OPCODE_SHL_VD (0x0f005400)
# define OPCODE_SHL_VQ (0x4f005400)
@@ -718,6 +722,12 @@ host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element)
codegen_addlong(block, OPCODE_DUP_V2S | Rd(dst_reg) | Rn(src_n_reg) | DUP_ELEMENT(element));
}
void
host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index)
{
codegen_addlong(block, OPCODE_INS_D | Rd(dst_reg) | Rn(src_reg) | ((dst_index & 1) << 20) | ((src_index & 1) << 14));
}
void
host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data)
{

View File

@@ -72,6 +72,7 @@ void host_arm64_CSEL_EQ(codeblock_t *block, int dst_reg, int src_n_reg, int src_
void host_arm64_CSEL_VS(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg);
void host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element);
void host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index);
void host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data);
void host_arm64_EOR_REG(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg, int shift);

View File

@@ -1449,7 +1449,8 @@ codegen_PACKSSWB(codeblock_t *block, uop_t *uop)
int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real);
if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) {
host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b);
host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0);
host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0);
host_arm64_SQXTN_V8B_8H(block, dest_reg, REG_V_TEMP);
} else
fatal("PACKSSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real);
@@ -1465,7 +1466,8 @@ codegen_PACKSSDW(codeblock_t *block, uop_t *uop)
int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real);
if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) {
host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b);
host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0);
host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0);
host_arm64_SQXTN_V4H_4S(block, dest_reg, REG_V_TEMP);
} else
fatal("PACKSSDW %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real);
@@ -1479,8 +1481,13 @@ codegen_PACKUSWB(codeblock_t *block, uop_t *uop)
int dest_size = IREG_GET_SIZE(uop->dest_reg_a_real), src_size_b = IREG_GET_SIZE(uop->src_reg_b_real);
if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) {
host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b);
host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0);
host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0);
host_arm64_SQXTUN_V8B_8H(block, dest_reg, REG_V_TEMP);
//host_arm64_ADD_V4H(block, dest_reg, dest_reg, src_reg_b);
//host_arm64_SQXTUN_V8B_8H(block, REG_V_TEMP, src_reg_b);
//host_arm64_SQXTUN_V8B_8H(block, dest_reg, dest_reg);
//host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP);
} else
fatal("PACKUSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real);

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_arith.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
#define ropParith(func) \
uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \
@@ -40,6 +40,7 @@ extern void codegen_print_mmx(const char* str);
} \
\
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \
uop_CALL_FUNC(ir, codegen_print_mmx); \
return op_pc + 1; \
}

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_cmp.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
#define ropPcmp(func) \
uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \
@@ -40,6 +40,7 @@ extern void codegen_print_mmx(const char* str);
} \
\
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \
uop_CALL_FUNC(ir, codegen_print_mmx); \
return op_pc + 1; \
}

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_loadstore.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
uint32_t
ropMOVD_r_d(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, uint32_t op_32, uint32_t op_pc)
@@ -39,6 +39,7 @@ ropMOVD_r_d(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
return op_pc + 1;
}
@@ -67,6 +68,7 @@ ropMOVD_d_r(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
return op_pc + 1;
}
@@ -91,6 +93,7 @@ ropMOVQ_r_q(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
return op_pc + 1;
}
@@ -116,6 +119,7 @@ ropMOVQ_q_r(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
return op_pc + 1;
}

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_logic.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
uint32_t
ropPAND(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, uint32_t op_32, uint32_t op_pc)
{
@@ -110,6 +110,7 @@ ropPXOR(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetc
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
return op_pc + 1;
}

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_pack.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
#define ropPpack(func) \
uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \
uint32_t fetchdat, uint32_t op_32, uint32_t op_pc) \
@@ -39,6 +39,7 @@ extern void codegen_print_mmx(const char* str);
} \
\
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \
uop_CALL_FUNC(ir, codegen_print_mmx); \
return op_pc + 1; \
}

View File

@@ -16,7 +16,7 @@
#include "codegen_ops_mmx_shift.h"
#include "codegen_ops_helpers.h"
extern void codegen_print_mmx(const char* str);
extern void codegen_print_mmx(const char* str, uint32_t fetchdat);
uint32_t
ropPSxxW_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, UNUSED(uint32_t op_32), uint32_t op_pc)
{
@@ -41,6 +41,7 @@ ropPSxxW_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
codegen_mark_code_present(block, cs + op_pc + 1, 1);
return op_pc + 2;
@@ -69,6 +70,7 @@ ropPSxxD_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
codegen_mark_code_present(block, cs + op_pc + 1, 1);
return op_pc + 2;
@@ -97,6 +99,7 @@ ropPSxxQ_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t
}
uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__);
uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat);
uop_CALL_FUNC(ir, codegen_print_mmx);
codegen_mark_code_present(block, cs + op_pc + 1, 1);
return op_pc + 2;