Files
86Box/src/cpu_new/codegen_backend_x86_ops_sse.c

579 lines
24 KiB
C
Raw Normal View History

Added the IBM 5161 ISA expansion for PC and XT; Cleaned up the parallel port emulation, added IRQ support, and made enabling/disabling per port; Added the Award 430NX and the Intel Classic/PCI (Alfredo, 420TX); Finished the 586MC1; Added 8087 emulation; Moved Cyrix 6x86'es to the Dev branch; Sanitized/cleaned up memregs.c/h and intel.c/h; Split the chipsets from machines and sanitized Port 92 emulation; Added support for the 15bpp mode to the Compaq ATI 28800; Moved the MR 386DX and 486 machines to the Dev branch; Ported the new dynamic recompiler from PCem, but it remains in Dev branch until after v2.00; Ported the new timer code from PCem; Cleaned up the CPU table of unused stuff and better optimized its structure; Ported the Open-XT and Open-AT from VARCem, the Open-AT is in the Dev branch; Ported the XT MFM controller rewrite and adding of more controllers (incl. two RLL ones), from VARCem; Added the AHA-1540A and the BusTek BT-542B; Moved the Sumo SCSI-AT to the Dev branch; Minor IDE, FDC, and floppy drive code clean-ups; Made NCR 5380/53C400-based cards' BIOS address configurable; Got rid of the legacy romset variable; Unified (video) buffer and buffer32 into one and make the unified buffer 32-bit; Added the Amstead PPC512 per PCem patch by John Elliott; Switched memory mapping granularity from 16k to 4k (less than 1k not possible due to internal pages); Rewrote the CL-GD 54xx blitter, fixes Win-OS/2 on the 54x6 among other thing; Added the Image Manager 1024 and Professional Graphics Controller per PCem patch by John Elliott and work done on VARCem; Added Headland HT-216, GC-205 and Video 7 VGA 1024i emulation based on PCem commit; Implemented the fuction keys for the Toshiba T1000/T1200/T3100 enhancement; Amstrad MegaPC does now works correctly with non-internal graphics card; The SLiRP code no longer casts a packed struct type to a non-packed struct type; The Xi8088 and PB410a no longer hang on 86Box when PS/2 mouse is not present; The S3 Virge on BeOS is no longer broken (was broken by build #1591); OS/2 2.0 build 6.167 now sees key presses again; Xi8088 now work on CGA again; 86F images converted from either the old or new variants of the HxC MFM format now work correctly; Hardware interrupts with a vector of 0xFF are now handled correctly; OPTi 495SX boards no longer incorrectly have 64 MB maximum RAM when 32 MB is correct; Fixed VNC keyboard input bugs; Fixed AT RTC periodic interrupt - Chicago 58s / 73f / 73g / 81 MIDI play no longer hangs with the build's own VTD driver; Fixed mouse polling with internal mice - Amstrad and Olivetti mice now work correctly; Triones ATAPI DMA driver now correctly reads a file at the end of a CD image with a sectors number not divisible by 4; Compaq Portable now works with all graphics cards; Fixed various MDSI Genius bugs; Added segment limit checks and improved page fault checks for several CPU instructions - Memphis 15xx WINSETUP and Chicago 58s WINDISK.CPL no longer issue a GPF, and some S3 drivers that used to have glitches, now work correctly; Further improved the 808x emulation, also fixes the noticably choppy sound when using 808x CPU's, also fixes #355; OS/2 installer no logner locks up on splash screen on PS/2 Model 70 and 80, fixes #400. Fixed several Amstead bugs, GEM no longer crashes on the Amstrad 1640, fixes #391. Ported John Elliott's Amstrad fixes and improvement from PCem, and fixed the default language so it's correctly Engliish, fixes #278, fixes #389. Fixed a minor IDE timing bug, fixes #388. Fixed Toshiba T1000 RAM issues, fixes #379. Fixed EGA/(S)VGA overscan border handling, fixes #378; Got rid of the now long useless IDE channel 2 auto-removal, fixes #370; Fixed the BIOS files used by the AMSTRAD PC1512, fixes #366; Ported the Unicode CD image file name fix from VARCem, fixes #365; Fixed high density floppy disks on the Xi8088, fixes #359; Fixed some bugs in the Hercules emulation, fixes #346, fixes #358; Fixed the SCSI hard disk mode sense pages, fixes #356; Removed the AMI Unknown 386SX because of impossibility to identify the chipset, closes #349; Fixed bugs in the serial mouse emulation, fixes #344; Compiled 86Box binaries now include all the required .DLL's, fixes #341; Made some combo boxes in the Settings dialog slightly wider, fixes #276.
2019-09-20 14:02:30 +02:00
#if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32
#include <stdint.h>
#include "../86box.h"
#include "cpu.h"
#include "../mem.h"
#include "codegen.h"
#include "codegen_allocator.h"
#include "codegen_backend.h"
#include "codegen_backend_x86_defs.h"
#include "codegen_backend_x86_ops_sse.h"
#include "codegen_backend_x86_ops_helpers.h"
void host_x86_ADDPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x58, 0xc0 | src_reg | (dst_reg << 3)); /*ADDPS dst_reg, src_reg*/
}
void host_x86_ADDSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x58, 0xc0 | src_reg | (dst_reg << 3));
}
void host_x86_CMPPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg, int type)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x0f, 0xc2, 0xc0 | src_reg | (dst_reg << 3), type); /*CMPPS dst_reg, src_reg, type*/
}
void host_x86_COMISD_XREG_XREG(codeblock_t *block, int src_reg_a, int src_reg_b)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x2e, 0xc0 | src_reg_b | (src_reg_a << 3));
}
void host_x86_CVTDQ2PS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x5b, 0xc0 | src_reg | (dst_reg << 3)); /*CVTDQ2PS dst_reg, src_reg*/
}
void host_x86_CVTPS2DQ_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x5b, 0xc0 | src_reg | (dst_reg << 3)); /*CVTPS2DQ dst_reg, src_reg*/
}
void host_x86_CVTSD2SI_REG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x2d, 0xc0 | src_reg | (dst_reg << 3)); /*CVTSD2SI dst_reg, src_reg*/
}
void host_x86_CVTSD2SS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x5a, 0xc0 | src_reg | (dst_reg << 3)); /*CVTSD2SS dst_reg, src_reg*/
}
void host_x86_CVTSI2SD_XREG_REG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x2a, 0xc0 | src_reg | (dst_reg << 3)); /*CVTSI2SD dst_reg, src_reg*/
}
void host_x86_CVTSI2SS_XREG_REG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf3, 0x0f, 0x2a, 0xc0 | src_reg | (dst_reg << 3)); /*CVTSI2SD dst_reg, src_reg*/
}
void host_x86_CVTSS2SD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf3, 0x0f, 0x5a, 0xc0 | src_reg | (dst_reg << 3));
}
void host_x86_CVTSS2SD_XREG_BASE_INDEX(codeblock_t *block, int dst_reg, int base_reg, int idx_reg)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0xf3, 0x0f, 0x5a, 0x04 | (dst_reg << 3)); /*CVTSS2SD XMMx, [base_reg + idx_reg]*/
codegen_addbyte(block, base_reg | (idx_reg << 3));
}
void host_x86_DIVSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x5e, 0xc0 | src_reg | (dst_reg << 3)); /*DIVSD dst_reg, src_reg*/
}
void host_x86_DIVSS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf3, 0x0f, 0x5e, 0xc0 | src_reg | (dst_reg << 3)); /*DIVSS dst_reg, src_reg*/
}
void host_x86_LDMXCSR(codeblock_t *block, void *p)
{
int offset = (uintptr_t)p - (((uintptr_t)&cpu_state) + 128);
if (offset >= -128 && offset < 127)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x0f, 0xae, 0x50 | REG_EBP, offset); /*LDMXCSR offset[EBP]*/
}
else
{
codegen_alloc_bytes(block, 7);
codegen_addbyte3(block, 0x0f, 0xae, 0x15); /*LDMXCSR [p]*/
codegen_addlong(block, (uint32_t)p);
}
}
void host_x86_MAXSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x5f, 0xc0 | src_reg | (dst_reg << 3)); /*MAXSD dst_reg, src_reg*/
}
void host_x86_MOVD_BASE_INDEX_XREG(codeblock_t *block, int base_reg, int idx_reg, int src_reg)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x7e, 0x04 | (src_reg << 3)); /*MOVD XMMx, [base_reg + idx_reg]*/
codegen_addbyte(block, base_reg | (idx_reg << 3));
}
void host_x86_MOVD_REG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x7e, 0xc0 | dst_reg | (src_reg << 3));
}
void host_x86_MOVD_XREG_BASE_INDEX(codeblock_t *block, int dst_reg, int base_reg, int idx_reg)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x6e, 0x04 | (dst_reg << 3)); /*MOVD XMMx, [base_reg + idx_reg]*/
codegen_addbyte(block, base_reg | (idx_reg << 3));
}
void host_x86_MOVD_XREG_REG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x6e, 0xc0 | src_reg | (dst_reg << 3));
}
void host_x86_MOVQ_ABS_XREG(codeblock_t *block, void *p, int src_reg)
{
int offset = (uintptr_t)p - (((uintptr_t)&cpu_state) + 128);
if (offset >= -128 && offset < 127)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x45 | (src_reg << 3)); /*MOVQ offset[EBP], src_reg*/
codegen_addbyte(block, offset);
}
else
{
codegen_alloc_bytes(block, 8);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x05 | (src_reg << 3)); /*MOVQ [p], src_reg*/
codegen_addlong(block, (uint32_t)p);
}
}
void host_x86_MOVQ_ABS_REG_REG_SHIFT_XREG(codeblock_t *block, uint32_t addr, int src_reg_a, int src_reg_b, int shift, int src_reg)
{
if (addr < 0x80 || addr >= 0xffffff80)
{
codegen_alloc_bytes(block, 6);
codegen_addbyte3(block, 0x66, 0x0f, 0xd6); /*MOVQ addr[src_reg_a + src_reg_b << shift], XMMx*/
codegen_addbyte3(block, 0x44 | (src_reg << 3), src_reg_a | (src_reg_b << 3) | (shift << 6), addr & 0xff);
}
else
{
codegen_alloc_bytes(block, 9);
codegen_addbyte3(block, 0x66, 0x0f, 0xd6); /*MOVQ addr[src_reg_a + src_reg_b << shift], XMMx*/
codegen_addbyte2(block, 0x84 | (src_reg << 3), src_reg_a | (src_reg_b << 3) | (shift << 6));
codegen_addlong(block, addr);
}
}
void host_x86_MOVQ_BASE_INDEX_XREG(codeblock_t *block, int base_reg, int idx_reg, int src_reg)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x04 | (src_reg << 3)); /*MOVQ XMMx, [base_reg + idx_reg]*/
codegen_addbyte(block, base_reg | (idx_reg << 3));
}
void host_x86_MOVQ_BASE_OFFSET_XREG(codeblock_t *block, int base_reg, int offset, int src_reg)
{
if (offset >= -128 && offset < 127)
{
if (base_reg == REG_ESP)
{
codegen_alloc_bytes(block, 6);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x44 | (src_reg << 3)); /*MOVQ [ESP + offset], XMMx*/
codegen_addbyte2(block, 0x24, offset);
}
else
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x40 | base_reg | (src_reg << 3)); /*MOVQ [base_reg + offset], XMMx*/
codegen_addbyte(block, offset);
}
}
else
fatal("MOVQ_BASE_OFFSET_XREG - offset %i\n", offset);
}
void host_x86_MOVQ_STACK_OFFSET_XREG(codeblock_t *block, int offset, int src_reg)
{
if (!offset)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x04 | (src_reg << 3)); /*MOVQ [ESP], src_reg*/
codegen_addbyte(block, 0x24);
}
else if (offset >= -0x80 && offset < 0x80)
Added the IBM 5161 ISA expansion for PC and XT; Cleaned up the parallel port emulation, added IRQ support, and made enabling/disabling per port; Added the Award 430NX and the Intel Classic/PCI (Alfredo, 420TX); Finished the 586MC1; Added 8087 emulation; Moved Cyrix 6x86'es to the Dev branch; Sanitized/cleaned up memregs.c/h and intel.c/h; Split the chipsets from machines and sanitized Port 92 emulation; Added support for the 15bpp mode to the Compaq ATI 28800; Moved the MR 386DX and 486 machines to the Dev branch; Ported the new dynamic recompiler from PCem, but it remains in Dev branch until after v2.00; Ported the new timer code from PCem; Cleaned up the CPU table of unused stuff and better optimized its structure; Ported the Open-XT and Open-AT from VARCem, the Open-AT is in the Dev branch; Ported the XT MFM controller rewrite and adding of more controllers (incl. two RLL ones), from VARCem; Added the AHA-1540A and the BusTek BT-542B; Moved the Sumo SCSI-AT to the Dev branch; Minor IDE, FDC, and floppy drive code clean-ups; Made NCR 5380/53C400-based cards' BIOS address configurable; Got rid of the legacy romset variable; Unified (video) buffer and buffer32 into one and make the unified buffer 32-bit; Added the Amstead PPC512 per PCem patch by John Elliott; Switched memory mapping granularity from 16k to 4k (less than 1k not possible due to internal pages); Rewrote the CL-GD 54xx blitter, fixes Win-OS/2 on the 54x6 among other thing; Added the Image Manager 1024 and Professional Graphics Controller per PCem patch by John Elliott and work done on VARCem; Added Headland HT-216, GC-205 and Video 7 VGA 1024i emulation based on PCem commit; Implemented the fuction keys for the Toshiba T1000/T1200/T3100 enhancement; Amstrad MegaPC does now works correctly with non-internal graphics card; The SLiRP code no longer casts a packed struct type to a non-packed struct type; The Xi8088 and PB410a no longer hang on 86Box when PS/2 mouse is not present; The S3 Virge on BeOS is no longer broken (was broken by build #1591); OS/2 2.0 build 6.167 now sees key presses again; Xi8088 now work on CGA again; 86F images converted from either the old or new variants of the HxC MFM format now work correctly; Hardware interrupts with a vector of 0xFF are now handled correctly; OPTi 495SX boards no longer incorrectly have 64 MB maximum RAM when 32 MB is correct; Fixed VNC keyboard input bugs; Fixed AT RTC periodic interrupt - Chicago 58s / 73f / 73g / 81 MIDI play no longer hangs with the build's own VTD driver; Fixed mouse polling with internal mice - Amstrad and Olivetti mice now work correctly; Triones ATAPI DMA driver now correctly reads a file at the end of a CD image with a sectors number not divisible by 4; Compaq Portable now works with all graphics cards; Fixed various MDSI Genius bugs; Added segment limit checks and improved page fault checks for several CPU instructions - Memphis 15xx WINSETUP and Chicago 58s WINDISK.CPL no longer issue a GPF, and some S3 drivers that used to have glitches, now work correctly; Further improved the 808x emulation, also fixes the noticably choppy sound when using 808x CPU's, also fixes #355; OS/2 installer no logner locks up on splash screen on PS/2 Model 70 and 80, fixes #400. Fixed several Amstead bugs, GEM no longer crashes on the Amstrad 1640, fixes #391. Ported John Elliott's Amstrad fixes and improvement from PCem, and fixed the default language so it's correctly Engliish, fixes #278, fixes #389. Fixed a minor IDE timing bug, fixes #388. Fixed Toshiba T1000 RAM issues, fixes #379. Fixed EGA/(S)VGA overscan border handling, fixes #378; Got rid of the now long useless IDE channel 2 auto-removal, fixes #370; Fixed the BIOS files used by the AMSTRAD PC1512, fixes #366; Ported the Unicode CD image file name fix from VARCem, fixes #365; Fixed high density floppy disks on the Xi8088, fixes #359; Fixed some bugs in the Hercules emulation, fixes #346, fixes #358; Fixed the SCSI hard disk mode sense pages, fixes #356; Removed the AMI Unknown 386SX because of impossibility to identify the chipset, closes #349; Fixed bugs in the serial mouse emulation, fixes #344; Compiled 86Box binaries now include all the required .DLL's, fixes #341; Made some combo boxes in the Settings dialog slightly wider, fixes #276.
2019-09-20 14:02:30 +02:00
{
codegen_alloc_bytes(block, 6);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x44 | (src_reg << 3)); /*MOVQ offset[ESP], src_reg*/
codegen_addbyte2(block, 0x24, offset & 0xff);
}
else
{
codegen_alloc_bytes(block, 9);
codegen_addbyte4(block, 0x66, 0x0f, 0xd6, 0x84 | (src_reg << 3)); /*MOVQ offset[ESP], src_reg*/
codegen_addbyte(block, 0x24);
codegen_addlong(block, offset);
}
}
void host_x86_MOVQ_XREG_ABS(codeblock_t *block, int dst_reg, void *p)
{
int offset = (uintptr_t)p - (((uintptr_t)&cpu_state) + 128);
if (offset >= -128 && offset < 127)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0x45 | (dst_reg << 3)); /*MOVQ offset[EBP], src_reg*/
codegen_addbyte(block, offset);
}
else
{
codegen_alloc_bytes(block, 8);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0x05 | (dst_reg << 3)); /*MOVQ [p], src_reg*/
codegen_addlong(block, (uint32_t)p);
}
}
void host_x86_MOVQ_XREG_ABS_REG_REG_SHIFT(codeblock_t *block, int dst_reg, uint32_t addr, int src_reg_a, int src_reg_b, int shift)
{
if (addr < 0x80 || addr >= 0xffffff80)
{
codegen_alloc_bytes(block, 6);
codegen_addbyte3(block, 0xf3, 0x0f, 0x7e); /*MOVQ XMMx, addr[src_reg_a + src_reg_b << shift]*/
codegen_addbyte3(block, 0x44 | (dst_reg << 3), src_reg_a | (src_reg_b << 3) | (shift << 6), addr & 0xff);
}
else
{
codegen_alloc_bytes(block, 9);
codegen_addbyte3(block, 0xf3, 0x0f, 0x7e); /*MOVQ XMMx, addr[src_reg_a + src_reg_b << shift]*/
codegen_addbyte2(block, 0x84 | (dst_reg << 3), src_reg_a | (src_reg_b << 3) | (shift << 6));
codegen_addlong(block, addr);
}
}
void host_x86_MOVQ_XREG_BASE_INDEX(codeblock_t *block, int dst_reg, int base_reg, int idx_reg)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0x04 | (dst_reg << 3)); /*MOVQ XMMx, [base_reg + idx_reg]*/
codegen_addbyte(block, base_reg | (idx_reg << 3));
}
void host_x86_MOVQ_XREG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, int offset)
{
if (offset >= -128 && offset < 127)
{
if (base_reg == REG_ESP)
{
codegen_alloc_bytes(block, 6);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0x44 | (dst_reg << 3)); /*MOVQ XMMx, [ESP + offset]*/
codegen_addbyte2(block, 0x24, offset);
}
else
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0x40 | base_reg | (dst_reg << 3)); /*MOVQ XMMx, [base_reg + offset]*/
codegen_addbyte(block, offset);
}
}
else
fatal("MOVQ_REG_BASE_OFFSET - offset %i\n", offset);
}
void host_x86_MOVQ_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf3, 0x0f, 0x7e, 0xc0 | src_reg | (dst_reg << 3)); /*MOVQ dst_reg, src_reg*/
}
void host_x86_MAXPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x5f, 0xc0 | src_reg | (dst_reg << 3)); /*MAXPS dst_reg, src_reg*/
}
void host_x86_MINPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x5d, 0xc0 | src_reg | (dst_reg << 3)); /*MINPS dst_reg, src_reg*/
}
void host_x86_MULPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x59, 0xc0 | src_reg | (dst_reg << 3)); /*MULPS dst_reg, src_reg*/
}
void host_x86_MULSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x59, 0xc0 | src_reg | (dst_reg << 3));
}
void host_x86_PACKSSWB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 9);
codegen_addbyte4(block, 0x66, 0x0f, 0x63, 0xc0 | src_reg | (dst_reg << 3)); /*PACKSSWB dst_reg, src_reg*/
codegen_addbyte4(block, 0x66, 0x0f, 0x70, 0xc0 | dst_reg | (dst_reg << 3)); /*PSHUFD dst_reg, dst_reg, 0x88 (move bits 64-95 to 32-63)*/
codegen_addbyte(block, 0x88);
}
void host_x86_PACKSSDW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 9);
codegen_addbyte4(block, 0x66, 0x0f, 0x6b, 0xc0 | src_reg | (dst_reg << 3)); /*PACKSSDW dst_reg, src_reg*/
codegen_addbyte4(block, 0x66, 0x0f, 0x70, 0xc0 | dst_reg | (dst_reg << 3)); /*PSHUFD dst_reg, dst_reg, 0x88 (move bits 64-95 to 32-63)*/
codegen_addbyte(block, 0x88);
}
void host_x86_PACKUSWB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 9);
codegen_addbyte4(block, 0x66, 0x0f, 0x67, 0xc0 | src_reg | (dst_reg << 3)); /*PACKUSWB dst_reg, src_reg*/
codegen_addbyte4(block, 0x66, 0x0f, 0x70, 0xc0 | dst_reg | (dst_reg << 3)); /*PSHUFD dst_reg, dst_reg, 0x88 (move bits 64-95 to 32-63)*/
codegen_addbyte(block, 0x88);
}
void host_x86_PADDB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xfc, 0xc0 | src_reg | (dst_reg << 3)); /*PADDB dst_reg, src_reg*/
}
void host_x86_PADDW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xfd, 0xc0 | src_reg | (dst_reg << 3)); /*PADDW dst_reg, src_reg*/
}
void host_x86_PADDD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xfe, 0xc0 | src_reg | (dst_reg << 3)); /*PADDD dst_reg, src_reg*/
}
void host_x86_PADDSB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xec, 0xc0 | src_reg | (dst_reg << 3)); /*PADDSB dst_reg, src_reg*/
}
void host_x86_PADDSW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xed, 0xc0 | src_reg | (dst_reg << 3)); /*PADDSW dst_reg, src_reg*/
}
void host_x86_PADDUSB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xdc, 0xc0 | src_reg | (dst_reg << 3)); /*PADDUSB dst_reg, src_reg*/
}
void host_x86_PADDUSW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xdd, 0xc0 | src_reg | (dst_reg << 3)); /*PADDUSW dst_reg, src_reg*/
}
void host_x86_PAND_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xdb, 0xc0 | src_reg | (dst_reg << 3)); /*PAND dst_reg, src_reg*/
}
void host_x86_PANDN_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xdf, 0xc0 | src_reg | (dst_reg << 3)); /*PANDN dst_reg, src_reg*/
}
void host_x86_POR_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xeb, 0xc0 | src_reg | (dst_reg << 3)); /*POR dst_reg, src_reg*/
}
void host_x86_PXOR_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xef, 0xc0 | src_reg | (dst_reg << 3)); /*PXOR dst_reg, src_reg*/
}
void host_x86_PCMPEQB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x74, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPEQB dst_reg, src_reg*/
}
void host_x86_PCMPEQW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x75, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPEQW dst_reg, src_reg*/
}
void host_x86_PCMPEQD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x76, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPEQD dst_reg, src_reg*/
}
void host_x86_PCMPGTB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x64, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPGTB dst_reg, src_reg*/
}
void host_x86_PCMPGTW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x65, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPGTW dst_reg, src_reg*/
}
void host_x86_PCMPGTD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x66, 0xc0 | src_reg | (dst_reg << 3)); /*PCMPGTD dst_reg, src_reg*/
}
void host_x86_PMADDWD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xf5, 0xc0 | src_reg | (dst_reg << 3)); /*PMULLW dst_reg, src_reg*/
}
void host_x86_PMULHW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xe5, 0xc0 | src_reg | (dst_reg << 3)); /*PMULLW dst_reg, src_reg*/
}
void host_x86_PMULLW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xd5, 0xc0 | src_reg | (dst_reg << 3)); /*PMULLW dst_reg, src_reg*/
}
void host_x86_PSHUFD_XREG_XREG_IMM(codeblock_t *block, int dst_reg, int src_reg, uint8_t shuffle)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x70, 0xc0 | src_reg | (dst_reg << 3)); /*PSHUFD dst_reg, dst_reg, 0xee (move top 64-bits to low 64-bits)*/
codegen_addbyte(block, shuffle);
}
void host_x86_PSLLW_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x71, 0xc0 | 0x30 | dst_reg); /*PSLLW dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSLLD_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x72, 0xc0 | 0x30 | dst_reg); /*PSLLD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSLLQ_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x73, 0xc0 | 0x30 | dst_reg); /*PSLLD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRAW_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x71, 0xc0 | 0x20 | dst_reg); /*PSRAW dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRAD_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x72, 0xc0 | 0x20 | dst_reg); /*PSRAD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRAQ_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x73, 0xc0 | 0x20 | dst_reg); /*PSRAD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRLW_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x71, 0xc0 | 0x10 | dst_reg); /*PSRLW dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRLD_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x72, 0xc0 | 0x10 | dst_reg); /*PSRLD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSRLQ_XREG_IMM(codeblock_t *block, int dst_reg, int shift)
{
codegen_alloc_bytes(block, 5);
codegen_addbyte4(block, 0x66, 0x0f, 0x73, 0xc0 | 0x10 | dst_reg); /*PSRLD dst_reg, imm*/
codegen_addbyte(block, shift);
}
void host_x86_PSUBB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xf8, 0xc0 | src_reg | (dst_reg << 3)); /*PADDB dst_reg, src_reg*/
}
void host_x86_PSUBW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xf9, 0xc0 | src_reg | (dst_reg << 3)); /*PADDW dst_reg, src_reg*/
}
void host_x86_PSUBD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xfa, 0xc0 | src_reg | (dst_reg << 3)); /*PADDD dst_reg, src_reg*/
}
void host_x86_PSUBSB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xe8, 0xc0 | src_reg | (dst_reg << 3)); /*PSUBSB dst_reg, src_reg*/
}
void host_x86_PSUBSW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xe9, 0xc0 | src_reg | (dst_reg << 3)); /*PSUBSW dst_reg, src_reg*/
}
void host_x86_PSUBUSB_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xd8, 0xc0 | src_reg | (dst_reg << 3)); /*PSUBUSB dst_reg, src_reg*/
}
void host_x86_PSUBUSW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0xd9, 0xc0 | src_reg | (dst_reg << 3)); /*PSUBUSW dst_reg, src_reg*/
}
void host_x86_PUNPCKLBW_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x60, 0xc0 | src_reg | (dst_reg << 3)); /*PUNPCKLBW dst_reg, src_reg*/
}
void host_x86_PUNPCKLWD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x61, 0xc0 | src_reg | (dst_reg << 3)); /*PUNPCKLWD dst_reg, src_reg*/
}
void host_x86_PUNPCKLDQ_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0x66, 0x0f, 0x62, 0xc0 | src_reg | (dst_reg << 3)); /*PUNPCKLDQ dst_reg, src_reg*/
}
void host_x86_SQRTSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x51, 0xc0 | src_reg | (dst_reg << 3)); /*SQRTSD dst_reg, src_reg*/
}
void host_x86_SQRTSS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf3, 0x0f, 0x51, 0xc0 | src_reg | (dst_reg << 3)); /*SQRTSS dst_reg, src_reg*/
}
void host_x86_SUBPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x5c, 0xc0 | src_reg | (dst_reg << 3)); /*SUBPS dst_reg, src_reg*/
}
void host_x86_SUBSD_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 4);
codegen_addbyte4(block, 0xf2, 0x0f, 0x5c, 0xc0 | src_reg | (dst_reg << 3));
}
void host_x86_UNPCKLPS_XREG_XREG(codeblock_t *block, int dst_reg, int src_reg)
{
codegen_alloc_bytes(block, 3);
codegen_addbyte3(block, 0x0f, 0x14, 0xc0 | src_reg | (dst_reg << 3));
}
#endif