Use SQXTUN instead of UQXTN in PACKUSWB on arm64

This commit is contained in:
Jasmine Iwanek
2022-12-02 14:03:30 -05:00
committed by Cacodemon345
parent 9702e28e5c
commit 6de981363e
3 changed files with 10 additions and 2 deletions

View File

@@ -180,6 +180,7 @@
# define OPCODE_SQSUB_V8B (0x0e202c00)
# define OPCODE_SQSUB_V4H (0x0e602c00)
# define OPCODE_SQXTN_V8B_8H (0x0e214800)
# define OPCODE_SQXTUN_V8B_8H (0x7e212800)
# define OPCODE_SQXTN_V4H_4S (0x0e614800)
# define OPCODE_SHL_VD (0x0f005400)
# define OPCODE_SHL_VQ (0x4f005400)
@@ -1225,6 +1226,12 @@ host_arm64_SQXTN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_addlong(block, OPCODE_SQXTN_V8B_8H | Rd(dst_reg) | Rn(src_reg));
}
void host_arm64_SQXTUN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_addlong(block, OPCODE_SQXTUN_V8B_8H | Rd(dst_reg) | Rn(src_reg));
}
void
host_arm64_SQXTN_V4H_4S(codeblock_t *block, int dst_reg, int src_reg)
{

View File

@@ -184,6 +184,7 @@ void host_arm64_SQSUB_V8B(codeblock_t *block, int dst_reg, int src_n_reg, int sr
void host_arm64_SQSUB_V4H(codeblock_t *block, int dst_reg, int src_n_reg, int src_m_reg);
void host_arm64_SQXTN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg);
void host_arm64_SQXTUN_V8B_8H(codeblock_t *block, int dst_reg, int src_reg);
void host_arm64_SQXTN_V4H_4S(codeblock_t *block, int dst_reg, int src_reg);
void host_arm64_SHL_V4H(codeblock_t *block, int dst_reg, int src_reg, int shift);

View File

@@ -1480,8 +1480,8 @@ codegen_PACKUSWB(codeblock_t *block, uop_t *uop)
int dest_size = IREG_GET_SIZE(uop->dest_reg_a_real), src_size_b = IREG_GET_SIZE(uop->src_reg_b_real);
if (REG_IS_Q(dest_size) && REG_IS_Q(src_size_b) && uop->dest_reg_a_real == uop->src_reg_a_real) {
host_arm64_UQXTN_V8B_8H(block, REG_V_TEMP, src_reg_b);
host_arm64_UQXTN_V8B_8H(block, dest_reg, dest_reg);
host_arm64_SQXTUN_V8B_8H(block, REG_V_TEMP, src_reg_b);
host_arm64_SQXTUN_V8B_8H(block, dest_reg, dest_reg);
host_arm64_ZIP1_V2S(block, dest_reg, dest_reg, REG_V_TEMP);
} else
fatal("PACKUSWB %02x %02x %02x\n", uop->dest_reg_a_real, uop->src_reg_a_real, uop->src_reg_b_real);