diff --git a/src/codegen_new/codegen_backend_arm64_ops.c b/src/codegen_new/codegen_backend_arm64_ops.c index afe00fe4d..7f0518f04 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.c +++ b/src/codegen_new/codegen_backend_arm64_ops.c @@ -208,6 +208,7 @@ # define OPCODE_ZIP1_V8B (0x0e003800) # define OPCODE_ZIP1_V4H (0x0e403800) # define OPCODE_ZIP1_V2S (0x0e803800) +# define OPCODE_ZIP1_V2D (0x4ec03800) # define OPCODE_ZIP2_V8B (0x0e007800) # define OPCODE_ZIP2_V4H (0x0e407800) # define OPCODE_ZIP2_V2S (0x0e807800) @@ -1483,6 +1484,11 @@ host_arm64_ZIP1_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_re codegen_addlong(block, OPCODE_ZIP1_V2S | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); } void +host_arm64_ZIP1_V2D(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg) +{ + codegen_addlong(block, OPCODE_ZIP1_V2D | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); +} +void host_arm64_ZIP2_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg) { codegen_addlong(block, OPCODE_ZIP2_V8B | Rd(dst_reg) | Rn(src_n_reg) | Rm(src_m_reg)); diff --git a/src/codegen_new/codegen_backend_arm64_ops.h b/src/codegen_new/codegen_backend_arm64_ops.h index 084bbb404..152ab6793 100644 --- a/src/codegen_new/codegen_backend_arm64_ops.h +++ b/src/codegen_new/codegen_backend_arm64_ops.h @@ -244,6 +244,7 @@ void host_arm64_USHR_V2D(codeblock_t *block, int dst_reg, int src_reg, int shift void host_arm64_ZIP1_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP1_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP1_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); +void host_arm64_ZIP1_V2D(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); void host_arm64_ZIP2_V2S(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg); diff --git a/src/codegen_new/codegen_backend_arm64_uops.c b/src/codegen_new/codegen_backend_arm64_uops.c index 4c00cbed2..925e6517b 100644 --- a/src/codegen_new/codegen_backend_arm64_uops.c +++ b/src/codegen_new/codegen_backend_arm64_uops.c @@ -1449,9 +1449,8 @@ codegen_PACKSSWB(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_SQXTN_V8B_8H(block, REG_V_TEMP, src_reg_b); - host_arm64_SQXTN_V8B_8H(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_SQXTN_V8B_8H(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1466,9 +1465,8 @@ codegen_PACKSSDW(codeblock_t *block, uop_t *uop) int src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_SQXTN_V4H_4S(block, REG_V_TEMP, src_reg_b); - host_arm64_SQXTN_V4H_4S(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_SQXTN_V4H_4S(block, dest_reg, REG_V_TEMP); } else fatal("PACKSSDW %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real); @@ -1481,9 +1479,8 @@ codegen_PACKUSWB(codeblock_t *block, uop_t *uop) int dest_size = IREG_GET_SIZE(uop->dest_reg_a_real), src_size_b = IREG_GET_SIZE(uop->src_reg_b_real); if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) { - host_arm64_SQXTUN_V8B_8H(block, REG_V_TEMP, src_reg_b); - host_arm64_SQXTUN_V8B_8H(block, dest_reg, dest_reg); - host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP); + host_arm64_ZIP1_V2D(block, REG_V_TEMP, dest_reg, src_reg_b); + host_arm64_SQXTUN_V8B_8H(block, dest_reg, REG_V_TEMP); } else fatal("PACKUSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real);