diff --git a/src/codegen_new/codegen_backend_arm64_ops.c b/src/codegen_new/codegen_backend_arm64_ops.c index 915cae93d..9d5806edf 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.c +++ b/src/codegen_new/codegen_backend_arm64_ops.c @@ -102,6 +102,10 @@ # define OPCODE_SUB_LSR (0x25a << 21) # define OPCODE_SUBX_LSL (0x658 << 21) +# define OPCODE_INS_B (0x6e010400) +# define OPCODE_INS_H (0x6e020400) +# define OPCODE_INS_S (0x6e040400) +# define OPCODE_INS_D (0x6e080400) # define OPCODE_ADD_V8B (0x0e208400) # define OPCODE_ADD_V4H (0x0e608400) # define OPCODE_ADD_V2S (0x0ea08400) @@ -180,6 +184,7 @@ # define OPCODE_SQSUB_V8B (0x0e202c00) # define OPCODE_SQSUB_V4H (0x0e602c00) # define OPCODE_SQXTN_V8B_8H (0x0e214800) +# define OPCODE_SQXTUN_V8B_8H (0x2e212800) # define OPCODE_SQXTN_V4H_4S (0x0e614800) # define OPCODE_SHL_VD (0x0f005400) # define OPCODE_SHL_VQ (0x4f005400) @@ -207,6 +212,7 @@ # define OPCODE_ZIP1_V8B (0x0e003800) # define OPCODE_ZIP1_V4H (0x0e403800) # define OPCODE_ZIP1_V2S (0x0e803800) +# define OPCODE_ZIP1_V2D (0x4ec03800) # define OPCODE_ZIP2_V8B (0x0e007800) # define OPCODE_ZIP2_V4H (0x0e407800) # define OPCODE_ZIP2_V2S (0x0e807800) @@ -225,11 +231,11 @@ # define IMM_LOGICAL(imm) ((imm) << 10) -# define BIT_TBxZ(bit) ((((bit) &0x1f) << 19) | (((bit) &0x20) ? (1 << 31) : 0)) +# define BIT_TBxZ(bit) ((((bit) & 0x1f) << 19) | (((bit) & 0x20) ? (1 << 31) : 0)) # define OFFSET14(offset) (((offset >> 2) << 5) & 0x0007ffe0) # define OFFSET19(offset) (((offset >> 2) << 5) & 0x00ffffe0) -# define OFFSET20(offset) (((offset & 3) << 29) | ((((offset) &0x1fffff) >> 2) << 5)) +# define OFFSET20(offset) (((offset & 3) << 29) | ((((offset) & 0x1fffff) >> 2) << 5)) # define OFFSET26(offset) ((offset >> 2) & 0x03ffffff) # define OFFSET12_B(offset) (offset << 10) @@ -716,6 +722,12 @@ host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element) codegen_addlong(block, OPCODE_DUP_V2S | Rd(dst_reg) | Rn(src_n_reg) | DUP_ELEMENT(element)); } +void +host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index) +{ + codegen_addlong(block, OPCODE_INS_D | Rd(dst_reg) | Rn(src_reg) | ((dst_index & 1) << 20) | ((src_index & 1) << 14)); +} + void host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data) { @@ -1225,6 +1237,13 @@ host_arm64_SQXTN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg) { codegen_addlong(block, OPCODE_SQXTN_V8B_8H | Rd(dst_reg) | Rn(src_reg)); } + +void +host_arm64_SQXTUN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg) +{ + codegen_addlong(block, OPCODE_SQXTUN_V8B_8H | Rd(dst_reg) | Rn(src_reg)); +} + void host_arm64_SQXTN_V4H_4S(codeblock_t *block, int dst_reg, int src_reg) { @@ -1475,6 +1494,11 @@ host_arm64_ZIP1_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_re codegen_addlong(block, OPCODE_ZIP1_V2S | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); } void +host_arm64_ZIP1_V2D(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg) +{ + codegen_addlong(block, OPCODE_ZIP1_V2D | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); +} +void host_arm64_ZIP2_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg) { codegen_addlong(block, OPCODE_ZIP2_V8B | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); diff --git a/src/codegen_new/codegen_backend_arm64_ops.h b/src/codegen_new/codegen_backend_arm64_ops.h index df751b4aa..129c2b2a3 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.h +++ b/src/codegen_new/codegen_backend_arm64_ops.h @@ -72,6 +72,7 @@ void host_arm64_CSEL_EQ(codeblock_t *block, int dst_reg, int src_n_reg, int src_ void host_arm64_CSEL_VS(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_DUP_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int element); +void host_arm64_INS_D(codeblock_t *block, int dst_reg, int src_reg, int dst_index, int src_index); void host_arm64_EOR_IMM(codeblock_t *block, int dst_reg, int src_n_reg, uint32_t imm_data); void host_arm64_EOR_REG(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg, int shift); @@ -184,6 +185,7 @@ void host_arm64_SQSUB_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int sr void host_arm64_SQSUB_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_SQXTN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg); +void host_arm64_SQXTUN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg); void host_arm64_SQXTN_V4H_4S(codeblock_t *block, int dst_reg, int src_reg); void host_arm64_SHL_V4H(codeblock_t *block, int dst_reg, int src_reg, int shift); @@ -243,6 +245,7 @@ void host_arm64_USHR_V2D(codeblock_t *block, int dst_reg, int src_reg, int shift void host_arm64_ZIP1_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP1_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP1_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); +void host_arm64_ZIP1_V2D(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); diff --git a/src/codegen_new/codegen_backend_arm64_uops.c b/src/codegen_new/codegen_backend_arm64_uops.c index 2bb6281ff..d06685cb2 100644 --- a/src/codegen_new/codegen_backend_arm64_uops.c +++ b/src/codegen_new/codegen_backend_arm64_uops.c @@ -801,7 +801,8 @@ codegen_MMX_ENTER(codeblock_t *block, uop_t *uop) host_arm64_STR_IMM_W(block, REG_TEMP, REG_CPUSTATE, (uintptr_t) &cpu_state.tag[0] - (uintptr_t) &cpu_state); host_arm64_STR_IMM_W(block, REG_TEMP, REG_CPUSTATE, (uintptr_t) &cpu_state.tag[4] - (uintptr_t) &cpu_state); host_arm64_STR_IMM_W(block, REG_WZR, REG_CPUSTATE, (uintptr_t) &cpu_state.TOP - (uintptr_t) &cpu_state); - host_arm64_STRB_IMM(block, REG_WZR, REG_CPUSTATE, (uintptr_t) &cpu_state.ismmx - (uintptr_t) &cpu_state); + host_arm64_AND_IMM(block, REG_TEMP, REG_TEMP, 1); + host_arm64_STRB_IMM(block, REG_TEMP, REG_CPUSTATE, (uintptr_t) &cpu_state.ismmx - (uintptr_t) &cpu_state); return 0; } @@ -849,28 +850,28 @@ codegen_LOAD_FUNC_ARG3(codeblock_t *block, uop_t *uop) static int codegen_LOAD_FUNC_ARG0_IMM(codeblock_t *block, uop_t *uop) { - host_arm64_mov_imm(block, REG_ARG0, uop->imm_data); + host_arm64_MOVX_IMM(block, REG_ARG0, uop->imm_data); return 0; } static int codegen_LOAD_FUNC_ARG1_IMM(codeblock_t *block, uop_t *uop) { - host_arm64_mov_imm(block, REG_ARG1, uop->imm_data); + host_arm64_MOVX_IMM(block, REG_ARG1, uop->imm_data); return 0; } static int codegen_LOAD_FUNC_ARG2_IMM(codeblock_t *block, uop_t *uop) { - host_arm64_mov_imm(block, REG_ARG2, uop->imm_data); + host_arm64_MOVX_IMM(block, REG_ARG2, uop->imm_data); return 0; } static int codegen_LOAD_FUNC_ARG3_IMM(codeblock_t *block, uop_t *uop) { - host_arm64_mov_imm(block, REG_ARG3, uop->imm_data); + host_arm64_MOVX_IMM(block, REG_ARG3, uop->imm_data); return 0; } @@ -1448,9 +1449,9 @@ codegen_PACKSSWB(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_SQXTN_V8B_8H(block, REG_V_TEMP, src_reg_b); - host_arm64_SQXTN_V8B_8H(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); + host_arm64_SQXTN_V8B_8H(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1465,9 +1466,9 @@ codegen_PACKSSDW(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_SQXTN_V4H_4S(block, REG_V_TEMP, src_reg_b); - host_arm64_SQXTN_V4H_4S(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); + host_arm64_SQXTN_V4H_4S(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSDW %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1480,9 +1481,9 @@ codegen_PACKUSWB(codeblock_t *block, uop_t *uop) int dest_size = IREG_GET_SIZE(uop->dest_reg_a_real), src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_UQXTN_V8B_8H(block, REG_V_TEMP, src_reg_b); - host_arm64_UQXTN_V8B_8H(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_INS_D(block, REG_V_TEMP, dest_reg, 0, 0); + host_arm64_INS_D(block, REG_V_TEMP, src_reg_b, 1, 0); + host_arm64_SQXTUN_V8B_8H(block, dest_reg, REG_V_TEMP); } else fatal("PACKUSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); diff --git a/src/codegen_new/codegen_backend_x86-64_uops.c b/src/codegen_new/codegen_backend_x86-64_uops.c index 356c8bcde..6b206d5b5 100644 --- a/src/codegen_new/codegen_backend_x86-64_uops.c +++ b/src/codegen_new/codegen_backend_x86-64_uops.c @@ -220,9 +220,9 @@ static int codegen_CALL_INSTRUCTION_FUNC(codeblock_t *block, uop_t *uop) { # if _WIN64 - host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RCX, uop->imm_data); # else - host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RDI, uop->imm_data); # endif host_x86_CALL(block, uop->p); host_x86_TEST32_REG(block, REG_EAX, REG_EAX); @@ -906,9 +906,9 @@ static int codegen_LOAD_FUNC_ARG0_IMM(codeblock_t *block, uop_t *uop) { # if _WIN64 - host_x86_MOV32_REG_IMM(block, REG_ECX, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RCX, uop->imm_data); # else - host_x86_MOV32_REG_IMM(block, REG_EDI, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RDI, uop->imm_data); # endif return 0; } @@ -916,9 +916,9 @@ static int codegen_LOAD_FUNC_ARG1_IMM(codeblock_t *block, uop_t *uop) { # if _WIN64 - host_x86_MOV32_REG_IMM(block, REG_EDX, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RDX, uop->imm_data); # else - host_x86_MOV32_REG_IMM(block, REG_ESI, uop->imm_data); + host_x86_MOV64_REG_IMM(block, REG_RSI, uop->imm_data); # endif return 0; } diff --git a/src/codegen_new/codegen_ir.c b/src/codegen_new/codegen_ir.c index d14fa0f23..dfd136289 100644 --- a/src/codegen_new/codegen_ir.c +++ b/src/codegen_new/codegen_ir.c @@ -53,6 +53,7 @@ duplicate_uop(ir_data_t *ir, uop_t *uop, int offset) new_uop->imm_data = uop->imm_data; new_uop->p = uop->p; new_uop->pc = uop->pc; + new_uop->is_a16 = uop->is_a16; if (uop->jump_dest_uop != -1) { new_uop->jump_dest_uop = uop->jump_dest_uop + offset; diff --git a/src/codegen_new/codegen_ir_defs.h b/src/codegen_new/codegen_ir_defs.h index 60f7badea..bfc19373b 100644 --- a/src/codegen_new/codegen_ir_defs.h +++ b/src/codegen_new/codegen_ir_defs.h @@ -336,7 +336,7 @@ typedef struct uop_t { ir_reg_t src_reg_a; ir_reg_t src_reg_b; ir_reg_t src_reg_c; - uint32_t imm_data; + uintptr_t imm_data; void *p; ir_host_reg_t dest_reg_a_real; ir_host_reg_t src_reg_a_real, src_reg_b_real, src_reg_c_real; @@ -601,7 +601,7 @@ uop_gen_reg_src3_imm(uint32_t uop_type, ir_data_t *ir, int src_reg_a, int src_re } static inline void -uop_gen_imm(uint32_t uop_type, ir_data_t *ir, uint32_t imm) +uop_gen_imm(uint32_t uop_type, ir_data_t *ir, uintptr_t imm) { uop_t *uop = uop_alloc(ir, uop_type); diff --git a/src/codegen_new/codegen_ops.c b/src/codegen_new/codegen_ops.c index bb7d1f3ee..68861ff52 100644 --- a/src/codegen_new/codegen_ops.c +++ b/src/codegen_new/codegen_ops.c @@ -86,13 +86,8 @@ RecompOpFn recomp_opcodes_0f[512] = { /*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 -/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#else /*60*/ ropPUNPCKLBW, ropPUNPCKLWD, ropPUNPCKLDQ, ropPACKSSWB, ropPCMPGTB, ropPCMPGTW, ropPCMPGTD, ropPACKUSWB, ropPUNPCKHBW, ropPUNPCKHWD, ropPUNPCKHDQ, ropPACKSSDW, NULL, NULL, ropMOVD_r_d, ropMOVQ_r_q, /*70*/ NULL, ropPSxxW_imm, ropPSxxD_imm, ropPSxxQ_imm, ropPCMPEQB, ropPCMPEQW, ropPCMPEQD, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVD_d_r, ropMOVQ_q_r, -#endif /*80*/ ropJO_16, ropJNO_16, ropJB_16, ropJNB_16, ropJE_16, ropJNE_16, ropJBE_16, ropJNBE_16, ropJS_16, ropJNS_16, ropJP_16, ropJNP_16, ropJL_16, ropJNL_16, ropJLE_16, ropJNLE_16, /*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -100,13 +95,11 @@ RecompOpFn recomp_opcodes_0f[512] = { /*b0*/ NULL, NULL, ropLSS_16, NULL, ropLFS_16, ropLGS_16, ropMOVZX_16_8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVSX_16_8, NULL, /*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 -/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#else /*d0*/ NULL, NULL, NULL, NULL, NULL, ropPMULLW, NULL, NULL, ropPSUBUSB, ropPSUBUSW, NULL, ropPAND, ropPADDUSB, ropPADDUSW, NULL, ropPANDN, /*e0*/ NULL, NULL, NULL, NULL, NULL, ropPMULHW, NULL, NULL, ropPSUBSB, ropPSUBSW, NULL, ropPOR, ropPADDSB, ropPADDSW, NULL, ropPXOR, +#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPSUBB, ropPSUBW, ropPSUBD, NULL, ropPADDB, ropPADDW, ropPADDD, NULL, +#else /*f0*/ NULL, NULL, NULL, NULL, NULL, ropPMADDWD, NULL, NULL, ropPSUBB, ropPSUBW, ropPSUBD, NULL, ropPADDB, ropPADDW, ropPADDD, NULL, #endif @@ -119,13 +112,8 @@ RecompOpFn recomp_opcodes_0f[512] = { /*40*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /*50*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 -/*60*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*70*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#else /*60*/ ropPUNPCKLBW, ropPUNPCKLWD, ropPUNPCKLDQ, ropPACKSSWB, ropPCMPGTB, ropPCMPGTW, ropPCMPGTD, ropPACKUSWB, ropPUNPCKHBW, ropPUNPCKHWD, ropPUNPCKHDQ, ropPACKSSDW, NULL, NULL, ropMOVD_r_d, ropMOVQ_r_q, /*70*/ NULL, ropPSxxW_imm, ropPSxxD_imm, ropPSxxQ_imm, ropPCMPEQB, ropPCMPEQW, ropPCMPEQD, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVD_d_r, ropMOVQ_q_r, -#endif /*80*/ ropJO_32, ropJNO_32, ropJB_32, ropJNB_32, ropJE_32, ropJNE_32, ropJBE_32, ropJNBE_32, ropJS_32, ropJNS_32, ropJP_32, ropJNP_32, ropJL_32, ropJNL_32, ropJLE_32, ropJNLE_32, /*90*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -133,13 +121,11 @@ RecompOpFn recomp_opcodes_0f[512] = { /*b0*/ NULL, NULL, ropLSS_32, NULL, ropLFS_32, ropLGS_32, ropMOVZX_32_8, ropMOVZX_32_16, NULL, NULL, NULL, NULL, NULL, NULL, ropMOVSX_32_8, ropMOVSX_32_16, /*c0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 -/*d0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*e0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -#else /*d0*/ NULL, NULL, NULL, NULL, NULL, ropPMULLW, NULL, NULL, ropPSUBUSB, ropPSUBUSW, NULL, ropPAND, ropPADDUSB, ropPADDUSW, NULL, ropPANDN, /*e0*/ NULL, NULL, NULL, NULL, NULL, ropPMULHW, NULL, NULL, ropPSUBSB, ropPSUBSW, NULL, ropPOR, ropPADDSB, ropPADDSW, NULL, ropPXOR, +#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 +/*f0*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ropPSUBB, ropPSUBW, ropPSUBD, NULL, ropPADDB, ropPADDW, ropPADDD, NULL, +#else /*f0*/ NULL, NULL, NULL, NULL, NULL, ropPMADDWD, NULL, NULL, ropPSUBB, ropPSUBW, ropPSUBD, NULL, ropPADDB, ropPADDW, ropPADDD, NULL, #endif // clang-format on diff --git a/src/codegen_new/codegen_ops_helpers.h b/src/codegen_new/codegen_ops_helpers.h index 92b721099..05928bd2f 100644 --- a/src/codegen_new/codegen_ops_helpers.h +++ b/src/codegen_new/codegen_ops_helpers.h @@ -114,6 +114,10 @@ int codegen_can_unroll_full(codeblock_t *block, ir_data_t *ir, uint32_t next_pc, static inline int codegen_can_unroll(codeblock_t *block, ir_data_t *ir, uint32_t next_pc, uint32_t dest_addr) { + /* TODO: Re-enable this again after fixing mysterious crashes on ARM64 with MMX instructions used. */ +#if defined __ARM_EABI__ || defined _ARM_ || defined _M_ARM || defined __aarch64__ || defined _M_ARM64 + return 0; +#endif if (block->flags & CODEBLOCK_BYTE_MASK) return 0; diff --git a/src/codegen_new/codegen_reg.c b/src/codegen_new/codegen_reg.c index f91377df8..b678bd6ac 100644 --- a/src/codegen_new/codegen_reg.c +++ b/src/codegen_new/codegen_reg.c @@ -201,7 +201,7 @@ static const uint8_t native_requested_sizes[9][8] = [REG_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, [REG_FPU_ST_DOUBLE][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1, - [REG_POINTER][(sizeof(void *) == 4) ? (IREG_SIZE_L >> IREG_SIZE_SHIFT) : (IREG_SIZE_Q >> IREG_SIZE_SHIFT)] = 1 + [REG_POINTER][IREG_SIZE_Q >> IREG_SIZE_SHIFT] = 1 }; void