diff --git a/src/codegen/codegen_ops_x86-64.h b/src/codegen/codegen_ops_x86-64.h index e46f55a05..01fb292cf 100644 --- a/src/codegen/codegen_ops_x86-64.h +++ b/src/codegen/codegen_ops_x86-64.h @@ -3571,6 +3571,8 @@ FP_FLD(int reg) addbyte(0x89); /*MOV [TOP], EBX*/ addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3688,6 +3690,8 @@ FP_LOAD_S(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -3717,6 +3721,8 @@ FP_LOAD_D(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3754,6 +3760,8 @@ FP_LOAD_IW(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -3787,6 +3795,8 @@ FP_LOAD_IL(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -3831,6 +3841,8 @@ FP_LOAD_IQ(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3863,6 +3875,8 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); addbyte(v ? 0 : 1); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void diff --git a/src/codegen/codegen_ops_x86.h b/src/codegen/codegen_ops_x86.h index c48324c2a..3b47d81a9 100644 --- a/src/codegen/codegen_ops_x86.h +++ b/src/codegen/codegen_ops_x86.h @@ -1794,6 +1794,7 @@ FP_FLD(int reg) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2037,6 +2038,7 @@ FP_LOAD_S(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -2096,6 +2098,7 @@ FP_LOAD_D(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IW(void) @@ -2154,6 +2157,7 @@ FP_LOAD_IW(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -2210,6 +2214,7 @@ FP_LOAD_IL(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -2285,6 +2290,7 @@ FP_LOAD_IQ(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2336,6 +2342,7 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline int diff --git a/src/codegen_new/codegen_ops_helpers.h b/src/codegen_new/codegen_ops_helpers.h index 92b721099..2304aa542 100644 --- a/src/codegen_new/codegen_ops_helpers.h +++ b/src/codegen_new/codegen_ops_helpers.h @@ -64,6 +64,9 @@ fpu_POP2(codeblock_t *block, ir_data_t *ir) static inline void fpu_PUSH(codeblock_t *block, ir_data_t *ir) { + uop_LOAD_FUNC_ARG_IMM(ir, 0, ((uint16_t)cpu_state.TOP - 1)); + uop_CALL_FUNC(ir, x87_to_mmxreg); + if (block->flags & CODEBLOCK_STATIC_TOP) uop_MOV_IMM(ir, IREG_FPU_TOP, cpu_state.TOP - 1); else diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 5f41c416a..5cb385122 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -74,6 +74,51 @@ x386_dynarec_log(const char *fmt, ...) # define x386_dynarec_log(fmt, ...) #endif +/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ +#if defined(_MSC_VER) && !defined(__clang__) +# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 +# define X87_INLINE_ASM +# endif +#else +# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__ +# define X87_INLINE_ASM +# endif +#endif + +#ifdef USE_NEW_DYNAREC +void +x87_to_mmxreg(uint16_t reg) +#else +void +x87_to_mmxreg(void) +#endif +{ +#ifndef USE_NEW_DYNAREC + uint32_t reg = cpu_state.TOP & 7; +#endif + double val = cpu_state.ST[reg & 7]; +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(val)); + + cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(val, &test); + cpu_state.MM[reg & 7].q = test.eind.ll; +#endif +} + static __inline void fetch_ea_32_long(uint32_t rmdat) { diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 1d003ddc9..98c6f4938 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -829,6 +829,12 @@ extern uint16_t prefetch_queue_get_ip(void); extern int prefetch_queue_get_prefetching(void); extern int prefetch_queue_get_size(void); +#ifdef USE_NEW_DYNAREC +extern void x87_to_mmxreg(uint16_t reg); +#else +extern void x87_to_mmxreg(void); +#endif + #define prefetch_queue_set_suspended(s) prefetch_queue_set_prefetching(!s) #define prefetch_queue_get_suspended !prefetch_queue_get_prefetching