From 1d8877fba79e1eec6958e96c646cb0872541a65b Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Mon, 15 Sep 2025 09:19:40 +0000 Subject: [PATCH] Progress on ARM64 MMX opcodes --- src/codegen_new/codegen.c | 4 ++-- src/codegen_new/codegen_backend_arm64_ops.c | 12 +++++++++++- src/codegen_new/codegen_backend_arm64_ops.h | 1 + src/codegen_new/codegen_backend_arm64_uops.c | 13 ++++++++++--- src/codegen_new/codegen_ops_mmx_arith.c | 3 ++- src/codegen_new/codegen_ops_mmx_cmp.c | 3 ++- src/codegen_new/codegen_ops_mmx_loadstore.c | 6 +++++- src/codegen_new/codegen_ops_mmx_logic.c | 3 ++- src/codegen_new/codegen_ops_mmx_pack.c | 3 ++- src/codegen_new/codegen_ops_mmx_shift.c | 5 ++++- 10 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/codegen_new/codegen.c b/src/codegen_new/codegen.c index 8dfb9b386..48d2afad9 100644 --- a/src/codegen_new/codegen.c +++ b/src/codegen_new/codegen.c @@ -31,9 +31,9 @@ static struct { } codegen_instructions[MAX_INSTRUCTION_COUNT]; void -codegen_print_mmx(const char* str) +codegen_print_mmx(const char* str, uint32_t fetchdat) { - pclog("MMX results: %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX (%s)\n", (unsigned long long)cpu_state.MM[0].q, (unsigned long long)cpu_state.MM[1].q, (unsigned long long)cpu_state.MM[2].q, (unsigned long long)cpu_state.MM[3].q, (unsigned long long)cpu_state.MM[4].q, (unsigned long long)cpu_state.MM[5].q, (unsigned long long)cpu_state.MM[6].q, (unsigned long long)cpu_state.MM[7].q, str); + pclog("MMX results: %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX, %016llX (%s, fetchdat 0x%08X)\n", (unsigned long long)cpu_state.MM[0].q, (unsigned long long)cpu_state.MM[1].q, (unsigned long long)cpu_state.MM[2].q, (unsigned long long)cpu_state.MM[3].q, (unsigned long long)cpu_state.MM[4].q, (unsigned long long)cpu_state.MM[5].q, (unsigned long long)cpu_state.MM[6].q, (unsigned long long)cpu_state.MM[7].q, str, fetchdat); } int diff --git a/src/codegen_new/codegen_backend_arm64_ops.c b/src/codegen_new/codegen_backend_arm64_ops.c index 7f0518f04..9d5806edf 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.c +++ b/src/codegen_new/codegen_backend_arm64_ops.c @@ -102,6 +102,10 @@ # define OPCODE_SUB_LSR (0x25a << 21) # define OPCODE_SUBX_LSL (0x658 << 21) +# define OPCODE_INS_B (0x6e010400) +# define OPCODE_INS_H (0x6e020400) +# define OPCODE_INS_S (0x6e040400) +# define OPCODE_INS_D (0x6e080400) # define OPCODE_ADD_V8B (0x0e208400) # define OPCODE_ADD_V4H (0x0e608400) # define OPCODE_ADD_V2S (0x0ea08400) @@ -180,7 +184,7 @@ # define OPCODE_SQSUB_V8B (0x0e202c00) # define OPCODE_SQSUB_V4H (0x0e602c00) # define OPCODE_SQXTN_V8B_8H (0x0e214800) -# define OPCODE_SQXTUN_V8B_8H (0x7e212800) +# define OPCODE_SQXTUN_V8B_8H (0x2e212800) # define OPCODE_SQXTN_V4H_4S (0x0e614800) # define OPCODE_SHL_VD (0x0f005400) # define OPCODE_SHL_VQ (0x4f005400) @@ -718,6 +722,12 @@ host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element) codegen_addlong(block, OPCODE_DUP_V2S | Rd(dst_reg) | Rn(src_n_reg) | DUP_ELEMENT(element)); } +void +host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index) +{ + codegen_addlong(block, OPCODE_INS_D | Rd(dst_reg) | Rn(src_reg) | ((dst_index & 1) << 20) | ((src_index & 1) << 14)); +} + void host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data) { diff --git a/src/codegen_new/codegen_backend_arm64_ops.h b/src/codegen_new/codegen_backend_arm64_ops.h index 152ab6793..129c2b2a3 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.h +++ b/src/codegen_new/codegen_backend_arm64_ops.h @@ -72,6 +72,7 @@ void host_arm64_CSEL_EQ(codeblock_t *block, int dst_reg, int src_n_reg, int src_ void host_arm64_CSEL_VS(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element); +void host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index); void host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data); void host_arm64_EOR_REG(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg, int shift); diff --git a/src/codegen_new/codegen_backend_arm64_uops.c b/src/codegen_new/codegen_backend_arm64_uops.c index c09b74fca..c4923387c 100644 --- a/src/codegen_new/codegen_backend_arm64_uops.c +++ b/src/codegen_new/codegen_backend_arm64_uops.c @@ -1449,7 +1449,8 @@ codegen_PACKSSWB(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); host_arm64_SQXTN_V8B_8H(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1465,7 +1466,8 @@ codegen_PACKSSDW(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); host_arm64_SQXTN_V4H_4S(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSDW %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1479,8 +1481,13 @@ codegen_PACKUSWB(codeblock_t *block, uop_t *uop) int dest_size = IREG_GET_SIZE(uop->dest_reg_a_real), src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); host_arm64_SQXTUN_V8B_8H(block, dest_reg, REG_V_TEMP); + //host_arm64_ADD_V4H(block, dest_reg, dest_reg, src_reg_b); + //host_arm64_SQXTUN_V8B_8H(block, REG_V_TEMP, src_reg_b); + //host_arm64_SQXTUN_V8B_8H(block, dest_reg, dest_reg); + //host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); } else fatal("PACKUSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); diff --git a/src/codegen_new/codegen_ops_mmx_arith.c b/src/codegen_new/codegen_ops_mmx_arith.c index 4f8a5d91c..4688f3e2b 100644 --- a/src/codegen_new/codegen_ops_mmx_arith.c +++ b/src/codegen_new/codegen_ops_mmx_arith.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_arith.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); #define ropParith(func) \ uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \ @@ -40,6 +40,7 @@ extern void codegen_print_mmx(const char* str); } \ \ uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \ + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \ uop_CALL_FUNC(ir, codegen_print_mmx); \ return op_pc + 1; \ } diff --git a/src/codegen_new/codegen_ops_mmx_cmp.c b/src/codegen_new/codegen_ops_mmx_cmp.c index d28c9197c..09006cae7 100644 --- a/src/codegen_new/codegen_ops_mmx_cmp.c +++ b/src/codegen_new/codegen_ops_mmx_cmp.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_cmp.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); #define ropPcmp(func) \ uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \ @@ -40,6 +40,7 @@ extern void codegen_print_mmx(const char* str); } \ \ uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \ + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \ uop_CALL_FUNC(ir, codegen_print_mmx); \ return op_pc + 1; \ } diff --git a/src/codegen_new/codegen_ops_mmx_loadstore.c b/src/codegen_new/codegen_ops_mmx_loadstore.c index d7ff5b355..267e7312a 100644 --- a/src/codegen_new/codegen_ops_mmx_loadstore.c +++ b/src/codegen_new/codegen_ops_mmx_loadstore.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_loadstore.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); uint32_t ropMOVD_r_d(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, uint32_t op_32, uint32_t op_pc) @@ -39,6 +39,7 @@ ropMOVD_r_d(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); return op_pc + 1; } @@ -67,6 +68,7 @@ ropMOVD_d_r(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); return op_pc + 1; } @@ -91,6 +93,7 @@ ropMOVQ_r_q(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); return op_pc + 1; } @@ -116,6 +119,7 @@ ropMOVQ_q_r(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); return op_pc + 1; } diff --git a/src/codegen_new/codegen_ops_mmx_logic.c b/src/codegen_new/codegen_ops_mmx_logic.c index e8e092fad..24dbd167e 100644 --- a/src/codegen_new/codegen_ops_mmx_logic.c +++ b/src/codegen_new/codegen_ops_mmx_logic.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_logic.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); uint32_t ropPAND(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, uint32_t op_32, uint32_t op_pc) { @@ -110,6 +110,7 @@ ropPXOR(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetc } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); return op_pc + 1; } diff --git a/src/codegen_new/codegen_ops_mmx_pack.c b/src/codegen_new/codegen_ops_mmx_pack.c index 5c01c7e92..86cd99e0a 100644 --- a/src/codegen_new/codegen_ops_mmx_pack.c +++ b/src/codegen_new/codegen_ops_mmx_pack.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_pack.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); #define ropPpack(func) \ uint32_t rop##func(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), \ uint32_t fetchdat, uint32_t op_32, uint32_t op_pc) \ @@ -39,6 +39,7 @@ extern void codegen_print_mmx(const char* str); } \ \ uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); \ + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); \ uop_CALL_FUNC(ir, codegen_print_mmx); \ return op_pc + 1; \ } diff --git a/src/codegen_new/codegen_ops_mmx_shift.c b/src/codegen_new/codegen_ops_mmx_shift.c index 42aea4f1c..e6c6da410 100644 --- a/src/codegen_new/codegen_ops_mmx_shift.c +++ b/src/codegen_new/codegen_ops_mmx_shift.c @@ -16,7 +16,7 @@ #include "codegen_ops_mmx_shift.h" #include "codegen_ops_helpers.h" -extern void codegen_print_mmx(const char* str); +extern void codegen_print_mmx(const char* str, uint32_t fetchdat); uint32_t ropPSxxW_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t fetchdat, UNUSED(uint32_t op_32), uint32_t op_pc) { @@ -41,6 +41,7 @@ ropPSxxW_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); codegen_mark_code_present(block, cs + op_pc + 1, 1); return op_pc + 2; @@ -69,6 +70,7 @@ ropPSxxD_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); codegen_mark_code_present(block, cs + op_pc + 1, 1); return op_pc + 2; @@ -97,6 +99,7 @@ ropPSxxQ_imm(codeblock_t *block, ir_data_t *ir, UNUSED(uint8_t opcode), uint32_t } uop_LOAD_FUNC_ARG_IMM(ir, 0, (uintptr_t)__func__); + uop_LOAD_FUNC_ARG_IMM(ir, 1, fetchdat); uop_CALL_FUNC(ir, codegen_print_mmx); codegen_mark_code_present(block, cs + op_pc + 1, 1); return op_pc + 2;