From 168910b72ff1820a2524b642496483a910ecc81b Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Wed, 26 Feb 2025 11:39:04 +0600 Subject: [PATCH 01/22] All held-down keys are now released when Alt-Tab'ing out Only applies when Raw Input is in use on Windows --- src/device/keyboard.c | 14 ++++++++++++++ src/include/86box/keyboard.h | 1 + src/qt/qt_mainwindow.cpp | 2 ++ 3 files changed, 17 insertions(+) diff --git a/src/device/keyboard.c b/src/device/keyboard.c index 582dcf0b1..58d5a4724 100644 --- a/src/device/keyboard.c +++ b/src/device/keyboard.c @@ -332,6 +332,20 @@ keyboard_input(int down, uint16_t scan) } } +void +keyboard_all_up(void) +{ + for (unsigned short i = 0; i < 0x200; i++) { + if (recv_key_ui[i]) { + recv_key_ui[i] = 0; + } + if (recv_key[i]) { + recv_key[i] = 0; + key_process(i, 0); + } + } +} + static uint8_t keyboard_do_break(uint16_t scan) { diff --git a/src/include/86box/keyboard.h b/src/include/86box/keyboard.h index ec5c05775..110e4f760 100644 --- a/src/include/86box/keyboard.h +++ b/src/include/86box/keyboard.h @@ -269,6 +269,7 @@ extern void keyboard_poll_host(void); extern void keyboard_process(void); extern uint16_t keyboard_convert(int ch); extern void keyboard_input(int down, uint16_t scan); +extern void keyboard_all_up(void); extern void keyboard_update_states(uint8_t cl, uint8_t nl, uint8_t sl); extern uint8_t keyboard_get_shift(void); extern void keyboard_get_states(uint8_t *cl, uint8_t *nl, uint8_t *sl); diff --git a/src/qt/qt_mainwindow.cpp b/src/qt/qt_mainwindow.cpp index 3ee58f23b..62ac94f30 100644 --- a/src/qt/qt_mainwindow.cpp +++ b/src/qt/qt_mainwindow.cpp @@ -277,6 +277,8 @@ MainWindow::MainWindow(QWidget *parent) if (mouse_capture) emit setMouseCapture(false); + keyboard_all_up(); + if (do_auto_pause && !dopause) { auto_paused = 1; plat_pause(1); From 6d3816df64aac6cb0e604bab30239c3b4b888265 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 01:03:00 +0600 Subject: [PATCH 02/22] x87: Fix Final Reality discolored screen for interpreter --- src/cpu/x87_ops.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index f1362bf76..276c32876 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -113,12 +113,33 @@ typedef union { static __inline void x87_push(double i) { +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif #ifdef USE_NEW_DYNAREC cpu_state.TOP--; #else cpu_state.TOP = (cpu_state.TOP - 1) & 7; #endif cpu_state.ST[cpu_state.TOP & 7] = i; + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(i)); + + cpu_state.MM[cpu_state.TOP & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(i, &test); + cpu_state.MM[cpu_state.TOP & 7].q = test.eind.ll; +#endif + #ifdef USE_NEW_DYNAREC cpu_state.tag[cpu_state.TOP & 7] = TAG_VALID; #else @@ -129,6 +150,11 @@ x87_push(double i) static __inline void x87_push_u64(uint64_t i) { +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif union { double d; uint64_t ll; @@ -142,6 +168,21 @@ x87_push_u64(uint64_t i) cpu_state.TOP = (cpu_state.TOP - 1) & 7; #endif cpu_state.ST[cpu_state.TOP & 7] = td.d; + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(td.d)); + + cpu_state.MM[cpu_state.TOP & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(td.d, &test); + cpu_state.MM[cpu_state.TOP & 7].q = test.eind.ll; +#endif #ifdef USE_NEW_DYNAREC cpu_state.tag[cpu_state.TOP & 7] = TAG_VALID; #else From 03dd94f361259b102fad8cfce2592fa81b801ee8 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 13:50:45 +0600 Subject: [PATCH 03/22] x87: Fix Final Reality discolored screen for all dynarecs --- src/codegen/codegen_ops_x86-64.h | 14 +++++++++ src/codegen/codegen_ops_x86.h | 7 +++++ src/codegen_new/codegen_ops_helpers.h | 3 ++ src/cpu/386_dynarec.c | 45 +++++++++++++++++++++++++++ src/cpu/cpu.h | 6 ++++ 5 files changed, 75 insertions(+) diff --git a/src/codegen/codegen_ops_x86-64.h b/src/codegen/codegen_ops_x86-64.h index e46f55a05..01fb292cf 100644 --- a/src/codegen/codegen_ops_x86-64.h +++ b/src/codegen/codegen_ops_x86-64.h @@ -3571,6 +3571,8 @@ FP_FLD(int reg) addbyte(0x89); /*MOV [TOP], EBX*/ addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3688,6 +3690,8 @@ FP_LOAD_S(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -3717,6 +3721,8 @@ FP_LOAD_D(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3754,6 +3760,8 @@ FP_LOAD_IW(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -3787,6 +3795,8 @@ FP_LOAD_IL(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -3831,6 +3841,8 @@ FP_LOAD_IQ(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3863,6 +3875,8 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); addbyte(v ? 0 : 1); + + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void diff --git a/src/codegen/codegen_ops_x86.h b/src/codegen/codegen_ops_x86.h index c48324c2a..3b47d81a9 100644 --- a/src/codegen/codegen_ops_x86.h +++ b/src/codegen/codegen_ops_x86.h @@ -1794,6 +1794,7 @@ FP_FLD(int reg) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2037,6 +2038,7 @@ FP_LOAD_S(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -2096,6 +2098,7 @@ FP_LOAD_D(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IW(void) @@ -2154,6 +2157,7 @@ FP_LOAD_IW(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -2210,6 +2214,7 @@ FP_LOAD_IL(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -2285,6 +2290,7 @@ FP_LOAD_IQ(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2336,6 +2342,7 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } + CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline int diff --git a/src/codegen_new/codegen_ops_helpers.h b/src/codegen_new/codegen_ops_helpers.h index 92b721099..2304aa542 100644 --- a/src/codegen_new/codegen_ops_helpers.h +++ b/src/codegen_new/codegen_ops_helpers.h @@ -64,6 +64,9 @@ fpu_POP2(codeblock_t *block, ir_data_t *ir) static inline void fpu_PUSH(codeblock_t *block, ir_data_t *ir) { + uop_LOAD_FUNC_ARG_IMM(ir, 0, ((uint16_t)cpu_state.TOP - 1)); + uop_CALL_FUNC(ir, x87_to_mmxreg); + if (block->flags & CODEBLOCK_STATIC_TOP) uop_MOV_IMM(ir, IREG_FPU_TOP, cpu_state.TOP - 1); else diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 5f41c416a..5cb385122 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -74,6 +74,51 @@ x386_dynarec_log(const char *fmt, ...) # define x386_dynarec_log(fmt, ...) #endif +/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ +#if defined(_MSC_VER) && !defined(__clang__) +# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 +# define X87_INLINE_ASM +# endif +#else +# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__ +# define X87_INLINE_ASM +# endif +#endif + +#ifdef USE_NEW_DYNAREC +void +x87_to_mmxreg(uint16_t reg) +#else +void +x87_to_mmxreg(void) +#endif +{ +#ifndef USE_NEW_DYNAREC + uint32_t reg = cpu_state.TOP & 7; +#endif + double val = cpu_state.ST[reg & 7]; +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(val)); + + cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(val, &test); + cpu_state.MM[reg & 7].q = test.eind.ll; +#endif +} + static __inline void fetch_ea_32_long(uint32_t rmdat) { diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 1d003ddc9..98c6f4938 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -829,6 +829,12 @@ extern uint16_t prefetch_queue_get_ip(void); extern int prefetch_queue_get_prefetching(void); extern int prefetch_queue_get_size(void); +#ifdef USE_NEW_DYNAREC +extern void x87_to_mmxreg(uint16_t reg); +#else +extern void x87_to_mmxreg(void); +#endif + #define prefetch_queue_set_suspended(s) prefetch_queue_set_prefetching(!s) #define prefetch_queue_get_suspended !prefetch_queue_get_prefetching From c7153916eb21c67647e4df2512cda9b8776e5653 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 14:33:44 +0600 Subject: [PATCH 04/22] Fix compile on ARM64 --- src/cpu/386_dynarec.c | 70 +++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 5cb385122..04681c9dc 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -74,7 +74,6 @@ x386_dynarec_log(const char *fmt, ...) # define x386_dynarec_log(fmt, ...) #endif -/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ #if defined(_MSC_VER) && !defined(__clang__) # if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 # define X87_INLINE_ASM @@ -85,40 +84,6 @@ x386_dynarec_log(const char *fmt, ...) # endif #endif -#ifdef USE_NEW_DYNAREC -void -x87_to_mmxreg(uint16_t reg) -#else -void -x87_to_mmxreg(void) -#endif -{ -#ifndef USE_NEW_DYNAREC - uint32_t reg = cpu_state.TOP & 7; -#endif - double val = cpu_state.ST[reg & 7]; -#ifdef X87_INLINE_ASM - unsigned char buffer[10]; -#else - x87_conv_t test; -#endif - -#ifdef X87_INLINE_ASM - __asm volatile("" - : - : - : "memory"); - - __asm volatile("fldl %1\n" - "fstpt %0\n" : "=m"(buffer) : "m"(val)); - - cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); -#else - x87_to80(val, &test); - cpu_state.MM[reg & 7].q = test.eind.ll; -#endif -} - static __inline void fetch_ea_32_long(uint32_t rmdat) { @@ -278,6 +243,41 @@ fetch_ea_16_long(uint32_t rmdat) # define CACHE_ON() (!(cr0 & (1 << 30)) && !(cpu_state.flags & T_FLAG)) #endif +/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ +#ifdef USE_NEW_DYNAREC +void +x87_to_mmxreg(uint16_t reg) +#else +void +x87_to_mmxreg(void) +#endif +{ +#ifndef USE_NEW_DYNAREC + uint32_t reg = cpu_state.TOP & 7; +#endif + double val = cpu_state.ST[reg & 7]; +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(val)); + + cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(val, &test); + cpu_state.MM[reg & 7].q = test.eind.ll; +#endif +} + #ifdef USE_DYNAREC int32_t cycles_main = 0; static int32_t cycles_old = 0; From fc656cbe0519b1a108aef25a333bf61cd5131cf8 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 16:31:27 +0600 Subject: [PATCH 05/22] Mouse cursor now properly appears after uncapture --- src/qt/qt_mainwindow.cpp | 4 +++- src/qt/qt_rendererstack.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/qt/qt_mainwindow.cpp b/src/qt/qt_mainwindow.cpp index 62ac94f30..89d17a07c 100644 --- a/src/qt/qt_mainwindow.cpp +++ b/src/qt/qt_mainwindow.cpp @@ -262,8 +262,10 @@ MainWindow::MainWindow(QWidget *parent) ui->stackedWidget->mouse_capture_func(this->windowHandle()); } else { this->releaseKeyboard(); - if (ui->stackedWidget->mouse_uncapture_func) + if (ui->stackedWidget->mouse_uncapture_func) { ui->stackedWidget->mouse_uncapture_func(); + } + ui->stackedWidget->unsetCursor(); } }); diff --git a/src/qt/qt_rendererstack.cpp b/src/qt/qt_rendererstack.cpp index a8bd47a79..8c31da2b2 100644 --- a/src/qt/qt_rendererstack.cpp +++ b/src/qt/qt_rendererstack.cpp @@ -155,7 +155,7 @@ RendererStack::mouseReleaseEvent(QMouseEvent *event) } if (mouse_capture && (event->button() == Qt::MiddleButton) && (mouse_get_buttons() < 3)) { plat_mouse_capture(0); - this->setCursor(Qt::ArrowCursor); + this->unsetCursor(); isMouseDown &= ~1; return; } From 95f30192e5cb7b20755c1235625c107a3391fe93 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 22:09:55 +0600 Subject: [PATCH 06/22] USB multimedia keys now work on Windows --- src/qt/qt_main.cpp | 115 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/src/qt/qt_main.cpp b/src/qt/qt_main.cpp index 72b0eef53..7723261b4 100644 --- a/src/qt/qt_main.cpp +++ b/src/qt/qt_main.cpp @@ -211,8 +211,121 @@ emu_LowLevelKeyboardProc(int nCode, WPARAM wParam, LPARAM lParam) (GetForegroundWindow() == ((HWND) secondaryRenderer->winId()))); } - if ((nCode < 0) || (nCode != HC_ACTION) || !is_over_window) + bool skip = ((nCode < 0) || (nCode != HC_ACTION) || !is_over_window); + + if (skip) return CallNextHookEx(NULL, nCode, wParam, lParam); + + /* USB keyboards send a scancode of 0x00 for multimedia keys. */ + if (lpKdhs->scanCode == 0x00) { + /* Handle USB keyboard multimedia keys where possible. + Only a handful of keys can be handled via Virtual Key + detection; rest can't be reliably detected. */ + DWORD vkCode = lpKdhs->vkCode; + bool up = !!(lpKdhs->flags & LLKHF_UP); + ret = CallNextHookEx(NULL, nCode, wParam, lParam);; + + switch (vkCode) + { + case VK_MEDIA_PLAY_PAUSE: + { + win_keyboard_handle(0x22, up, 1, 0); + break; + } + case VK_MEDIA_STOP: + { + win_keyboard_handle(0x24, up, 1, 0); + break; + } + case VK_VOLUME_UP: + { + win_keyboard_handle(0x30, up, 1, 0); + break; + } + case VK_VOLUME_DOWN: + { + win_keyboard_handle(0x2E, up, 1, 0); + break; + } + case VK_VOLUME_MUTE: + { + win_keyboard_handle(0x20, up, 1, 0); + break; + } + case VK_MEDIA_NEXT_TRACK: + { + win_keyboard_handle(0x19, up, 1, 0); + break; + } + case VK_MEDIA_PREV_TRACK: + { + win_keyboard_handle(0x10, up, 1, 0); + break; + } + case VK_LAUNCH_MEDIA_SELECT: + { + win_keyboard_handle(0x6D, up, 1, 0); + break; + } + case VK_LAUNCH_MAIL: + { + win_keyboard_handle(0x6C, up, 1, 0); + break; + } + case VK_LAUNCH_APP1: + { + win_keyboard_handle(0x6B, up, 1, 0); + break; + } + case VK_LAUNCH_APP2: + { + win_keyboard_handle(0x21, up, 1, 0); + break; + } + case VK_BROWSER_BACK: + { + win_keyboard_handle(0x6A, up, 1, 0); + break; + } + case VK_BROWSER_FORWARD: + { + win_keyboard_handle(0x69, up, 1, 0); + break; + } + case VK_BROWSER_STOP: + { + win_keyboard_handle(0x68, up, 1, 0); + break; + } + case VK_BROWSER_HOME: + { + win_keyboard_handle(0x32, up, 1, 0); + break; + } + case VK_BROWSER_SEARCH: + { + win_keyboard_handle(0x65, up, 1, 0); + break; + } + case VK_BROWSER_REFRESH: + { + win_keyboard_handle(0x67, up, 1, 0); + break; + } + case VK_BROWSER_FAVORITES: + { + win_keyboard_handle(0x66, up, 1, 0); + break; + } + case VK_HELP: + { + win_keyboard_handle(0x3b, up, 1, 0); + break; + } + } + + return ret; + } else if ((lpKdhs->scanCode == 0x01) && (lpKdhs->flags & LLKHF_ALTDOWN) && !(lpKdhs->flags & (LLKHF_UP | LLKHF_EXTENDED))) ret = TRUE; From 86342bfffe057d4ff01b18b05cd89816228c636c Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Thu, 27 Feb 2025 23:26:42 +0600 Subject: [PATCH 07/22] OS/2 3.0 icon backgrounds are now drawn properly --- src/video/vid_mga.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video/vid_mga.c b/src/video/vid_mga.c index 3937c2a7d..f11983a20 100644 --- a/src/video/vid_mga.c +++ b/src/video/vid_mga.c @@ -4677,6 +4677,7 @@ blit_trap(mystique_t *mystique) int err_l = (int32_t)mystique->dwgreg.ar[1]; int err_r = (int32_t)mystique->dwgreg.ar[4]; const int trans_sel = (mystique->dwgreg.dwgctrl_running & DWGCTRL_TRANS_MASK) >> DWGCTRL_TRANS_SHIFT; + bool transc = !!(mystique->dwgreg.dwgctrl_running & DWGCTRL_TRANSC); switch (mystique->dwgreg.dwgctrl_running & DWGCTRL_ATYPE_MASK) { case DWGCTRL_ATYPE_BLK: @@ -4699,6 +4700,7 @@ blit_trap(mystique_t *mystique) int pattern = mystique->dwgreg.pattern[yoff][xoff]; uint32_t dst; + if (!transc || (transc && pattern)) switch (mystique->maccess_running & MACCESS_PWIDTH_MASK) { case MACCESS_PWIDTH_8: svga->vram[(mystique->dwgreg.ydst_lin + x_l) & mystique->vram_mask] = (pattern ? mystique->dwgreg.fcol : mystique->dwgreg.bcol) & 0xff; @@ -4770,6 +4772,7 @@ blit_trap(mystique_t *mystique) uint32_t dst; uint32_t old_dst; + if (!transc || (transc && pattern)) switch (mystique->maccess_running & MACCESS_PWIDTH_MASK) { case MACCESS_PWIDTH_8: dst = svga->vram[(mystique->dwgreg.ydst_lin + x_l) & mystique->vram_mask]; From 0bb89be0ad0e718841ca99ef2405cb42f9666f6f Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Fri, 28 Feb 2025 16:51:13 +0600 Subject: [PATCH 08/22] Revert "Fix compile on ARM64" This reverts commit c7153916eb21c67647e4df2512cda9b8776e5653. --- src/cpu/386_dynarec.c | 70 +++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 04681c9dc..5cb385122 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -74,6 +74,7 @@ x386_dynarec_log(const char *fmt, ...) # define x386_dynarec_log(fmt, ...) #endif +/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ #if defined(_MSC_VER) && !defined(__clang__) # if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 # define X87_INLINE_ASM @@ -84,6 +85,40 @@ x386_dynarec_log(const char *fmt, ...) # endif #endif +#ifdef USE_NEW_DYNAREC +void +x87_to_mmxreg(uint16_t reg) +#else +void +x87_to_mmxreg(void) +#endif +{ +#ifndef USE_NEW_DYNAREC + uint32_t reg = cpu_state.TOP & 7; +#endif + double val = cpu_state.ST[reg & 7]; +#ifdef X87_INLINE_ASM + unsigned char buffer[10]; +#else + x87_conv_t test; +#endif + +#ifdef X87_INLINE_ASM + __asm volatile("" + : + : + : "memory"); + + __asm volatile("fldl %1\n" + "fstpt %0\n" : "=m"(buffer) : "m"(val)); + + cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); +#else + x87_to80(val, &test); + cpu_state.MM[reg & 7].q = test.eind.ll; +#endif +} + static __inline void fetch_ea_32_long(uint32_t rmdat) { @@ -243,41 +278,6 @@ fetch_ea_16_long(uint32_t rmdat) # define CACHE_ON() (!(cr0 & (1 << 30)) && !(cpu_state.flags & T_FLAG)) #endif -/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ -#ifdef USE_NEW_DYNAREC -void -x87_to_mmxreg(uint16_t reg) -#else -void -x87_to_mmxreg(void) -#endif -{ -#ifndef USE_NEW_DYNAREC - uint32_t reg = cpu_state.TOP & 7; -#endif - double val = cpu_state.ST[reg & 7]; -#ifdef X87_INLINE_ASM - unsigned char buffer[10]; -#else - x87_conv_t test; -#endif - -#ifdef X87_INLINE_ASM - __asm volatile("" - : - : - : "memory"); - - __asm volatile("fldl %1\n" - "fstpt %0\n" : "=m"(buffer) : "m"(val)); - - cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); -#else - x87_to80(val, &test); - cpu_state.MM[reg & 7].q = test.eind.ll; -#endif -} - #ifdef USE_DYNAREC int32_t cycles_main = 0; static int32_t cycles_old = 0; From 6bb2b447fda8011e992472fa0959d3ee4cd8deed Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Fri, 28 Feb 2025 16:51:33 +0600 Subject: [PATCH 09/22] Revert "x87: Fix Final Reality discolored screen for all dynarecs" This reverts commit 03dd94f361259b102fad8cfce2592fa81b801ee8. --- src/codegen/codegen_ops_x86-64.h | 14 --------- src/codegen/codegen_ops_x86.h | 7 ----- src/codegen_new/codegen_ops_helpers.h | 3 -- src/cpu/386_dynarec.c | 45 --------------------------- src/cpu/cpu.h | 6 ---- 5 files changed, 75 deletions(-) diff --git a/src/codegen/codegen_ops_x86-64.h b/src/codegen/codegen_ops_x86-64.h index 01fb292cf..e46f55a05 100644 --- a/src/codegen/codegen_ops_x86-64.h +++ b/src/codegen/codegen_ops_x86-64.h @@ -3571,8 +3571,6 @@ FP_FLD(int reg) addbyte(0x89); /*MOV [TOP], EBX*/ addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3690,8 +3688,6 @@ FP_LOAD_S(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -3721,8 +3717,6 @@ FP_LOAD_D(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3760,8 +3754,6 @@ FP_LOAD_IW(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -3795,8 +3787,6 @@ FP_LOAD_IL(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -3841,8 +3831,6 @@ FP_LOAD_IQ(void) addbyte(0x44); addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -3875,8 +3863,6 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag)); addbyte(v ? 0 : 1); - - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void diff --git a/src/codegen/codegen_ops_x86.h b/src/codegen/codegen_ops_x86.h index 3b47d81a9..c48324c2a 100644 --- a/src/codegen/codegen_ops_x86.h +++ b/src/codegen/codegen_ops_x86.h @@ -1794,7 +1794,6 @@ FP_FLD(int reg) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2038,7 +2037,6 @@ FP_LOAD_S(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_D(void) @@ -2098,7 +2096,6 @@ FP_LOAD_D(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IW(void) @@ -2157,7 +2154,6 @@ FP_LOAD_IW(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IL(void) @@ -2214,7 +2210,6 @@ FP_LOAD_IL(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void FP_LOAD_IQ(void) @@ -2290,7 +2285,6 @@ FP_LOAD_IQ(void) addbyte(0x1d); addbyte((uint8_t) cpu_state_offset(tag[0])); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline void @@ -2342,7 +2336,6 @@ FP_LOAD_IMM_Q(uint64_t v) addbyte(0x5d); addbyte((uint8_t) cpu_state_offset(TOP)); } - CALL_FUNC((uintptr_t) x87_to_mmxreg); } static __inline int diff --git a/src/codegen_new/codegen_ops_helpers.h b/src/codegen_new/codegen_ops_helpers.h index 2304aa542..92b721099 100644 --- a/src/codegen_new/codegen_ops_helpers.h +++ b/src/codegen_new/codegen_ops_helpers.h @@ -64,9 +64,6 @@ fpu_POP2(codeblock_t *block, ir_data_t *ir) static inline void fpu_PUSH(codeblock_t *block, ir_data_t *ir) { - uop_LOAD_FUNC_ARG_IMM(ir, 0, ((uint16_t)cpu_state.TOP - 1)); - uop_CALL_FUNC(ir, x87_to_mmxreg); - if (block->flags & CODEBLOCK_STATIC_TOP) uop_MOV_IMM(ir, IREG_FPU_TOP, cpu_state.TOP - 1); else diff --git a/src/cpu/386_dynarec.c b/src/cpu/386_dynarec.c index 5cb385122..5f41c416a 100644 --- a/src/cpu/386_dynarec.c +++ b/src/cpu/386_dynarec.c @@ -74,51 +74,6 @@ x386_dynarec_log(const char *fmt, ...) # define x386_dynarec_log(fmt, ...) #endif -/* Deliberately stashed here; this function is only relevant for dynamic recompilers. */ -#if defined(_MSC_VER) && !defined(__clang__) -# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 -# define X87_INLINE_ASM -# endif -#else -# if defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined _M_IX86 || defined _M_X64 || defined __amd64__ -# define X87_INLINE_ASM -# endif -#endif - -#ifdef USE_NEW_DYNAREC -void -x87_to_mmxreg(uint16_t reg) -#else -void -x87_to_mmxreg(void) -#endif -{ -#ifndef USE_NEW_DYNAREC - uint32_t reg = cpu_state.TOP & 7; -#endif - double val = cpu_state.ST[reg & 7]; -#ifdef X87_INLINE_ASM - unsigned char buffer[10]; -#else - x87_conv_t test; -#endif - -#ifdef X87_INLINE_ASM - __asm volatile("" - : - : - : "memory"); - - __asm volatile("fldl %1\n" - "fstpt %0\n" : "=m"(buffer) : "m"(val)); - - cpu_state.MM[reg & 7].q = (*(uint64_t*)buffer); -#else - x87_to80(val, &test); - cpu_state.MM[reg & 7].q = test.eind.ll; -#endif -} - static __inline void fetch_ea_32_long(uint32_t rmdat) { diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 98c6f4938..1d003ddc9 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -829,12 +829,6 @@ extern uint16_t prefetch_queue_get_ip(void); extern int prefetch_queue_get_prefetching(void); extern int prefetch_queue_get_size(void); -#ifdef USE_NEW_DYNAREC -extern void x87_to_mmxreg(uint16_t reg); -#else -extern void x87_to_mmxreg(void); -#endif - #define prefetch_queue_set_suspended(s) prefetch_queue_set_prefetching(!s) #define prefetch_queue_get_suspended !prefetch_queue_get_prefetching From 23b89d88c4e4fee4914d4c20b92a741e25e7c0ca Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Fri, 28 Feb 2025 16:51:48 +0600 Subject: [PATCH 10/22] Revert "x87: Fix Final Reality discolored screen for interpreter" This reverts commit 6d3816df64aac6cb0e604bab30239c3b4b888265. --- src/cpu/x87_ops.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index 276c32876..f1362bf76 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -113,33 +113,12 @@ typedef union { static __inline void x87_push(double i) { -#ifdef X87_INLINE_ASM - unsigned char buffer[10]; -#else - x87_conv_t test; -#endif #ifdef USE_NEW_DYNAREC cpu_state.TOP--; #else cpu_state.TOP = (cpu_state.TOP - 1) & 7; #endif cpu_state.ST[cpu_state.TOP & 7] = i; - -#ifdef X87_INLINE_ASM - __asm volatile("" - : - : - : "memory"); - - __asm volatile("fldl %1\n" - "fstpt %0\n" : "=m"(buffer) : "m"(i)); - - cpu_state.MM[cpu_state.TOP & 7].q = (*(uint64_t*)buffer); -#else - x87_to80(i, &test); - cpu_state.MM[cpu_state.TOP & 7].q = test.eind.ll; -#endif - #ifdef USE_NEW_DYNAREC cpu_state.tag[cpu_state.TOP & 7] = TAG_VALID; #else @@ -150,11 +129,6 @@ x87_push(double i) static __inline void x87_push_u64(uint64_t i) { -#ifdef X87_INLINE_ASM - unsigned char buffer[10]; -#else - x87_conv_t test; -#endif union { double d; uint64_t ll; @@ -168,21 +142,6 @@ x87_push_u64(uint64_t i) cpu_state.TOP = (cpu_state.TOP - 1) & 7; #endif cpu_state.ST[cpu_state.TOP & 7] = td.d; - -#ifdef X87_INLINE_ASM - __asm volatile("" - : - : - : "memory"); - - __asm volatile("fldl %1\n" - "fstpt %0\n" : "=m"(buffer) : "m"(td.d)); - - cpu_state.MM[cpu_state.TOP & 7].q = (*(uint64_t*)buffer); -#else - x87_to80(td.d, &test); - cpu_state.MM[cpu_state.TOP & 7].q = test.eind.ll; -#endif #ifdef USE_NEW_DYNAREC cpu_state.tag[cpu_state.TOP & 7] = TAG_VALID; #else From 843dee57076a28d4028b9105c87ead95e64d7933 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Sat, 1 Mar 2025 15:05:39 +0600 Subject: [PATCH 11/22] x64 NDR: Properly address the entire cpu_state struct All missing edge cases are now handled where possible --- src/codegen_new/codegen_backend_x86-64_ops.c | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/codegen_new/codegen_backend_x86-64_ops.c b/src/codegen_new/codegen_backend_x86-64_ops.c index 236a86ce7..2f65b46e3 100644 --- a/src/codegen_new/codegen_backend_x86-64_ops.c +++ b/src/codegen_new/codegen_backend_x86-64_ops.c @@ -509,6 +509,11 @@ host_x86_MOV8_ABS_IMM(codeblock_t *block, void *p, uint32_t imm_data) codegen_alloc_bytes(block, 4); codegen_addbyte3(block, 0xc6, 0x45, offset); /*MOVB offset[RBP], imm_data*/ codegen_addbyte(block, imm_data); + } else if (offset < (1ULL << 32)) { + codegen_alloc_bytes(block, 7); + codegen_addbyte2(block, 0xc6, 0x85); /*MOVB offset[RBP], imm_data*/ + codegen_addlong(block, offset); + codegen_addbyte(block, imm_data); } else { if ((uintptr_t) p >> 32) fatal("host_x86_MOV8_ABS_IMM - out of range %p\n", p); @@ -527,6 +532,11 @@ host_x86_MOV16_ABS_IMM(codeblock_t *block, void *p, uint16_t imm_data) codegen_alloc_bytes(block, 6); codegen_addbyte4(block, 0x66, 0xc7, 0x45, offset); /*MOV offset[RBP], imm_data*/ codegen_addword(block, imm_data); + } else if (offset < (1ULL << 32)) { + codegen_alloc_bytes(block, 8); + codegen_addbyte3(block, 0x66, 0xc7, 0x85); /*MOV offset[RBP], imm_data*/ + codegen_addlong(block, offset); + codegen_addword(block, imm_data); } else { if ((uintptr_t) p >> 32) fatal("host_x86_MOV32_ABS_IMM - out of range %p\n", p); @@ -545,6 +555,11 @@ host_x86_MOV32_ABS_IMM(codeblock_t *block, void *p, uint32_t imm_data) codegen_alloc_bytes(block, 7); codegen_addbyte3(block, 0xc7, 0x45, offset); /*MOV offset[RBP], imm_data*/ codegen_addlong(block, imm_data); + } else if (offset < (1ULL << 32)) { + codegen_alloc_bytes(block, 10); + codegen_addbyte2(block, 0xc7, 0x85); /*MOV offset[RBP], imm_data*/ + codegen_addlong(block, offset); + codegen_addlong(block, imm_data); } else { if ((uintptr_t) p >> 32) fatal("host_x86_MOV32_ABS_IMM - out of range %p\n", p); @@ -566,6 +581,10 @@ host_x86_MOV8_ABS_REG(codeblock_t *block, void *p, int src_reg) if (offset >= -128 && offset < 127) { codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x88, 0x45 | ((src_reg & 7) << 3), offset); /*MOVB offset[RBP], src_reg*/ + } else if (offset < (1ULL << 32)) { + codegen_alloc_bytes(block, 6); + codegen_addbyte2(block, 0x88, 0x85 | ((src_reg & 7) << 3)); /*MOVB offset[RBP], src_reg*/ + codegen_addlong(block, offset); } else { if ((uintptr_t) p >> 32) fatal("host_x86_MOV8_ABS_REG - out of range %p\n", p); @@ -630,6 +649,10 @@ host_x86_MOV64_ABS_REG(codeblock_t *block, void *p, int src_reg) if (offset >= -128 && offset < 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x48, 0x89, 0x45 | ((src_reg & 7) << 3), offset); /*MOV offset[RBP], src_reg*/ + } else if (offset < (1ULL << 32)) { + codegen_alloc_bytes(block, 7); + codegen_addbyte3(block, 0x48, 0x89, 0x85 | ((src_reg & 7) << 3)); /*MOV offset[RBP], src_reg*/ + codegen_addlong(block, offset); } else { if ((uintptr_t) p >> 32) fatal("host_x86_MOV64_ABS_REG - out of range %p\n", p); From d15def050aba3e2652355c718918fb78e9c79b53 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sat, 1 Mar 2025 10:11:45 +0100 Subject: [PATCH 12/22] Fix an unclosed comment in device/keyboard_at.c. --- src/device/keyboard_at.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/device/keyboard_at.c b/src/device/keyboard_at.c index d04ab07e9..e61b8547a 100644 --- a/src/device/keyboard_at.c +++ b/src/device/keyboard_at.c @@ -2526,7 +2526,7 @@ static const scancode scancode_set82[512] = { { .mk = {0xe0, 0x17, 0 }, .brk = { 0xe0, 0xF0, 0x17, 0 } }, /* 15a */ { .mk = { 0x67, 0 }, .brk = { 0xf0, 0x67, 0 } }, /* 15b 0x33 LGUI->Muhenkan (in emulator only) */ { .mk = { 0x64, 0 }, .brk = { 0xf0, 0x64, 0 } }, /* 15c 0x35 RGUI->Henkan (in emulator only) */ - { .mk = {0xe0, 0x11, 0 }, .brk = { 0xe0, 0xf0, 0x11, 0 } }, /* 15d 0x36 APPLICATION->Kana (in emulator + { .mk = {0xe0, 0x11, 0 }, .brk = { 0xe0, 0xf0, 0x11, 0 } }, /* 15d 0x36 APPLICATION->Kana (in emulator only) */ { .mk = {0xe0, 0x37, 0 }, .brk = { 0xe0, 0xF0, 0x37, 0 } }, /* 15e */ { .mk = {0xe0, 0x3F, 0 }, .brk = { 0xe0, 0xF0, 0x3F, 0 } }, /* 15f */ { .mk = { 0 }, .brk = { 0 } }, /* 160 */ From f290cc017309494ec6c7bc952b63b2de1e8b1935 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Sat, 1 Mar 2025 22:31:10 +0600 Subject: [PATCH 13/22] x64 NDR: Fix crashes/undefined behaviour with 2+ GB of allocated RAM --- src/codegen_new/codegen_backend_x86-64.c | 2 +- src/codegen_new/codegen_backend_x86-64_defs.h | 2 +- src/codegen_new/codegen_backend_x86-64_ops.c | 64 +++++++++---------- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/codegen_new/codegen_backend_x86-64.c b/src/codegen_new/codegen_backend_x86-64.c index 3cbca28f8..f4a5d024a 100644 --- a/src/codegen_new/codegen_backend_x86-64.c +++ b/src/codegen_new/codegen_backend_x86-64.c @@ -360,7 +360,7 @@ codegen_backend_prologue(codeblock_t *block) host_x86_MOV32_BASE_OFFSET_REG(block, REG_RSP, IREG_TOP_diff_stack_offset, REG_EAX); } if (block->flags & CODEBLOCK_NO_IMMEDIATES) - host_x86_MOV64_REG_IMM(block, REG_R12, (uintptr_t) ram); + host_x86_MOV64_REG_IMM(block, REG_R12, ((uintptr_t) ram) + 2147483648ULL); } void diff --git a/src/codegen_new/codegen_backend_x86-64_defs.h b/src/codegen_new/codegen_backend_x86-64_defs.h index 12f05f01c..7c58fec31 100644 --- a/src/codegen_new/codegen_backend_x86-64_defs.h +++ b/src/codegen_new/codegen_backend_x86-64_defs.h @@ -1,5 +1,5 @@ /*RBP = cpu_state + 128 - R12 = ram (if block->flags & CODEBLOCK_NO_IMMEDIATES)*/ + R12 = ram + 2147483648 (if block->flags & CODEBLOCK_NO_IMMEDIATES)*/ #define REG_AX 0 #define REG_CX 1 #define REG_DX 2 diff --git a/src/codegen_new/codegen_backend_x86-64_ops.c b/src/codegen_new/codegen_backend_x86-64_ops.c index 2f65b46e3..e53c58083 100644 --- a/src/codegen_new/codegen_backend_x86-64_ops.c +++ b/src/codegen_new/codegen_backend_x86-64_ops.c @@ -505,7 +505,7 @@ host_x86_MOV8_ABS_IMM(codeblock_t *block, void *p, uint32_t imm_data) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte3(block, 0xc6, 0x45, offset); /*MOVB offset[RBP], imm_data*/ codegen_addbyte(block, imm_data); @@ -528,7 +528,7 @@ host_x86_MOV16_ABS_IMM(codeblock_t *block, void *p, uint16_t imm_data) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 6); codegen_addbyte4(block, 0x66, 0xc7, 0x45, offset); /*MOV offset[RBP], imm_data*/ codegen_addword(block, imm_data); @@ -551,7 +551,7 @@ host_x86_MOV32_ABS_IMM(codeblock_t *block, void *p, uint32_t imm_data) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 7); codegen_addbyte3(block, 0xc7, 0x45, offset); /*MOV offset[RBP], imm_data*/ codegen_addlong(block, imm_data); @@ -578,7 +578,7 @@ host_x86_MOV8_ABS_REG(codeblock_t *block, void *p, int src_reg) if (src_reg & 8) fatal("host_x86_MOV8_ABS_REG - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x88, 0x45 | ((src_reg & 7) << 3), offset); /*MOVB offset[RBP], src_reg*/ } else if (offset < (1ULL << 32)) { @@ -602,7 +602,7 @@ host_x86_MOV16_ABS_REG(codeblock_t *block, void *p, int src_reg) if (src_reg & 8) fatal("host_x86_MOV16_ABS_REG - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0x89, 0x45 | ((src_reg & 7) << 3), offset); /*MOV offset[RBP], src_reg*/ } else if (offset < (1ULL << 32)) { @@ -622,7 +622,7 @@ host_x86_MOV32_ABS_REG(codeblock_t *block, void *p, int src_reg) if (src_reg & 8) fatal("host_x86_MOV32_ABS_REG - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x89, 0x45 | ((src_reg & 7) << 3), offset); /*MOV offset[RBP], src_reg*/ } else if (offset < (1ULL << 32)) { @@ -646,7 +646,7 @@ host_x86_MOV64_ABS_REG(codeblock_t *block, void *p, int src_reg) if (src_reg & 8) fatal("host_x86_MOV64_ABS_REG - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x48, 0x89, 0x45 | ((src_reg & 7) << 3), offset); /*MOV offset[RBP], src_reg*/ } else if (offset < (1ULL << 32)) { @@ -706,19 +706,19 @@ void host_x86_MOV8_REG_ABS(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); if (dst_reg & 8) fatal("host_x86_MOV8_REG_ABS reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x8a, 0x45 | ((dst_reg & 7) << 3), offset); /*MOV dst_reg, offset[RBP]*/ } else if (offset < (1ULL << 32)) { codegen_alloc_bytes(block, 6); codegen_addbyte2(block, 0x8a, 0x85 | ((dst_reg & 7) << 3)); /*MOV dst_reg, offset[RBP]*/ codegen_addlong(block, offset); - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { codegen_alloc_bytes(block, 8); codegen_addbyte4(block, 0x41, 0x8a, 0x84 | ((dst_reg & 7) << 3), 0x24); /*MOV dst_reg, ram_offset[R12]*/ codegen_addlong(block, ram_offset); @@ -730,19 +730,19 @@ void host_x86_MOV16_REG_ABS(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); if (dst_reg & 8) fatal("host_x86_MOV16_REG_ABS reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x66, 0x8b, 0x45 | ((dst_reg & 7) << 3), offset); /*MOV dst_reg, offset[RBP]*/ } else if (offset < (1ULL << 32)) { codegen_alloc_bytes(block, 7); codegen_addbyte3(block, 0x66, 0x8b, 0x85 | ((dst_reg & 7) << 3)); /*MOV dst_reg, offset[RBP]*/ codegen_addlong(block, offset); - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { codegen_alloc_bytes(block, 9); codegen_addbyte4(block, 0x66, 0x41, 0x8b, 0x84 | ((dst_reg & 7) << 3)); /*MOV dst_reg, ram_offset[R12]*/ codegen_addbyte(block, 0x24); @@ -760,19 +760,19 @@ void host_x86_MOV32_REG_ABS(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); if (dst_reg & 8) fatal("host_x86_MOV32_REG_ABS reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 3); codegen_addbyte3(block, 0x8b, 0x45 | ((dst_reg & 7) << 3), offset); /*MOV dst_reg, offset[RBP]*/ } else if (offset < (1ULL << 32)) { codegen_alloc_bytes(block, 6); codegen_addbyte2(block, 0x8b, 0x85 | ((dst_reg & 7) << 3)); /*MOV dst_reg, offset[RBP]*/ codegen_addlong(block, offset); - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { codegen_alloc_bytes(block, 8); codegen_addbyte4(block, 0x41, 0x8b, 0x84 | ((dst_reg & 7) << 3), 0x24); /*MOV dst_reg, ram_offset[R12]*/ codegen_addlong(block, ram_offset); @@ -792,7 +792,7 @@ host_x86_MOV64_REG_ABS(codeblock_t *block, int dst_reg, void *p) if (dst_reg & 8) fatal("host_x86_MOV64_REG_ABS reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x48, 0x8b, 0x45 | ((dst_reg & 7) << 3), offset); /*MOV dst_reg, offset[RBP]*/ } else if (offset < (1ULL << 32)) { @@ -845,7 +845,7 @@ host_x86_MOV16_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV16_REG_BASE_OFFSET reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0x66); @@ -863,7 +863,7 @@ host_x86_MOV32_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV32_REG_BASE_OFFSET reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), 0x24, offset); @@ -880,7 +880,7 @@ host_x86_MOV64_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if ((dst_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV64_REG_BASE_OFFSET reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0x48); @@ -899,7 +899,7 @@ host_x86_MOV32_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int if ((src_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV32_BASE_OFFSET_REG reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x89, 0x40 | base_reg | (src_reg << 3), 0x24, offset); @@ -916,7 +916,7 @@ host_x86_MOV64_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int if ((src_reg & 8) || (base_reg & 8)) fatal("host_x86_MOV64_BASE_OFFSET_REG reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0x48); @@ -935,7 +935,7 @@ host_x86_MOV32_BASE_OFFSET_IMM(codeblock_t *block, int base_reg, int offset, uin if (base_reg & 8) fatal("host_x86_MOV32_BASE_OFFSET_IMM reg & 8\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 8); codegen_addbyte4(block, 0xc7, 0x40 | base_reg, 0x24, offset); @@ -1107,16 +1107,16 @@ void host_x86_MOVZX_REG_ABS_16_8(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_16_8 - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0x66); codegen_addbyte4(block, 0x0f, 0xb6, 0x45 | ((dst_reg & 7) << 3), offset); /*MOVZX dst_reg, offset[RBP]*/ - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { codegen_alloc_bytes(block, 10); codegen_addbyte2(block, 0x66, 0x41); codegen_addbyte4(block, 0x0f, 0xb6, 0x84 | ((dst_reg & 7) << 3), 0x24); /*MOVZX dst_reg, ram_offset[R12]*/ @@ -1134,14 +1134,14 @@ void host_x86_MOVZX_REG_ABS_32_8(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); #if 0 if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_32_8 - bad reg\n"); #endif - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { if (dst_reg & 8) { codegen_alloc_bytes(block, 5); codegen_addbyte(block, 0x44); @@ -1150,7 +1150,7 @@ host_x86_MOVZX_REG_ABS_32_8(codeblock_t *block, int dst_reg, void *p) codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x0f, 0xb6, 0x45 | ((dst_reg & 7) << 3), offset); /*MOVZX dst_reg, offset[RBP]*/ } - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_32_8 - bad reg\n"); @@ -1173,15 +1173,15 @@ void host_x86_MOVZX_REG_ABS_32_16(codeblock_t *block, int dst_reg, void *p) { int64_t offset = (uintptr_t) p - (((uintptr_t) &cpu_state) + 128); - int64_t ram_offset = (uintptr_t) p - (uintptr_t) ram; + int64_t ram_offset = (uintptr_t) p - (((uintptr_t) ram) + 2147483648ULL); if (dst_reg & 8) fatal("host_x86_MOVZX_REG_ABS_32_16 - bad reg\n"); - if (offset >= -128 && offset < 127) { + if (offset >= -128 && offset <= 127) { codegen_alloc_bytes(block, 4); codegen_addbyte4(block, 0x0f, 0xb7, 0x45 | ((dst_reg & 7) << 3), offset); /*MOVZX dst_reg, offset[RBP]*/ - } else if ((ram_offset < (1ULL << 32)) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { + } else if ((ram_offset >= -2147483648LL) && (ram_offset <= 2147483647LL) && (block->flags & CODEBLOCK_NO_IMMEDIATES)) { codegen_alloc_bytes(block, 9); codegen_addbyte(block, 0x41); codegen_addbyte4(block, 0x0f, 0xb7, 0x84 | ((dst_reg & 7) << 3), 0x24); /*MOVZX dst_reg, ram_offset[R12]*/ From 6b55b2383a3fd6096f0e39a4c749272ecddc5650 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Sun, 2 Mar 2025 23:14:10 +0600 Subject: [PATCH 14/22] Misc NDR changes Force NDR on ARMv7/ARM64 Make GCC/Clang not use anything in the red zone Compile with `-fno-omit-frame-pointer` --- CMakeLists.txt | 7 +++++- src/CMakeLists.txt | 3 +++ src/codegen_new/codegen_backend_x86-64.c | 18 +++++++++++++++ src/codegen_new/codegen_backend_x86-64_ops.c | 24 ++++++++++---------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa687222a..5c33bb3e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,13 +132,18 @@ option(RTMIDI "RtMidi" option(FLUIDSYNTH "FluidSynth" ON) option(MUNT "MUNT" ON) option(VNC "VNC renderer" OFF) -option(NEW_DYNAREC "Use the PCem v15 (\"new\") dynamic recompiler" OFF) option(MINITRACE "Enable Chrome tracing using the modified minitrace library" OFF) option(GDBSTUB "Enable GDB stub server for debugging" OFF) option(DEV_BRANCH "Development branch" OFF) option(DISCORD "Discord Rich Presence support" ON) option(DEBUGREGS486 "Enable debug register opeartion on 486+ CPUs" OFF) +if((ARCH STREQUAL "arm64") OR (ARCH STREQUAL "arm")) + set(NEW_DYNAREC ON) +else() + option(NEW_DYNAREC "Use the PCem v15 (\"new\") dynamic recompiler" OFF) +endif() + if(WIN32) set(QT ON) option(CPPTHREADS "C++11 threads" OFF) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d6daff9c..ee17a7a21 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -223,6 +223,9 @@ elseif(APPLE AND NOT QT) COMPONENT Runtime) endif() +if(NEW_DYNAREC AND (ARCH STREQUAL "x86_64") AND (NOT WIN32)) + add_compile_options(-mno-red-zone -fno-omit-frame-pointer) +endif() # Install the PDB file on Windows builds if(MSVC) diff --git a/src/codegen_new/codegen_backend_x86-64.c b/src/codegen_new/codegen_backend_x86-64.c index f4a5d024a..6242ea40b 100644 --- a/src/codegen_new/codegen_backend_x86-64.c +++ b/src/codegen_new/codegen_backend_x86-64.c @@ -315,13 +315,19 @@ codegen_backend_init(void) # endif host_x86_CALL(block, (void *) x86gpf); codegen_exit_rout = &codeblock[block_current].data[block_pos]; +#ifdef _WIN64 host_x86_ADD64_REG_IMM(block, REG_RSP, 0x38); +#else + host_x86_ADD64_REG_IMM(block, REG_RSP, 0x48); +#endif host_x86_POP(block, REG_R15); host_x86_POP(block, REG_R14); host_x86_POP(block, REG_R13); host_x86_POP(block, REG_R12); +#ifdef _WIN64 host_x86_POP(block, REG_RDI); host_x86_POP(block, REG_RSI); +#endif host_x86_POP(block, REG_RBP); host_x86_POP(block, REG_RDX); host_x86_RET(block); @@ -346,13 +352,19 @@ codegen_backend_prologue(codeblock_t *block) block_pos = BLOCK_START; /*Entry code*/ host_x86_PUSH(block, REG_RBX); host_x86_PUSH(block, REG_RBP); +#ifdef _WIN64 host_x86_PUSH(block, REG_RSI); host_x86_PUSH(block, REG_RDI); +#endif host_x86_PUSH(block, REG_R12); host_x86_PUSH(block, REG_R13); host_x86_PUSH(block, REG_R14); host_x86_PUSH(block, REG_R15); +#ifdef _WIN64 host_x86_SUB64_REG_IMM(block, REG_RSP, 0x38); +#else + host_x86_SUB64_REG_IMM(block, REG_RSP, 0x48); +#endif host_x86_MOV64_REG_IMM(block, REG_RBP, ((uintptr_t) &cpu_state) + 128); if (block->flags & CODEBLOCK_HAS_FPU) { host_x86_MOV32_REG_ABS(block, REG_EAX, &cpu_state.TOP); @@ -366,13 +378,19 @@ codegen_backend_prologue(codeblock_t *block) void codegen_backend_epilogue(codeblock_t *block) { +#ifdef _WIN64 host_x86_ADD64_REG_IMM(block, REG_RSP, 0x38); +#else + host_x86_ADD64_REG_IMM(block, REG_RSP, 0x48); +#endif host_x86_POP(block, REG_R15); host_x86_POP(block, REG_R14); host_x86_POP(block, REG_R13); host_x86_POP(block, REG_R12); +#ifdef _WIN64 host_x86_POP(block, REG_RDI); host_x86_POP(block, REG_RSI); +#endif host_x86_POP(block, REG_RBP); host_x86_POP(block, REG_RDX); host_x86_RET(block); diff --git a/src/codegen_new/codegen_backend_x86-64_ops.c b/src/codegen_new/codegen_backend_x86-64_ops.c index e53c58083..39173505b 100644 --- a/src/codegen_new/codegen_backend_x86-64_ops.c +++ b/src/codegen_new/codegen_backend_x86-64_ops.c @@ -848,11 +848,11 @@ host_x86_MOV16_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); - codegen_addbyte(block, 0x66); + codegen_addbyte(block, 0x66); /* MOV dst_reg, [RSP + offset] */ codegen_addbyte4(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), 0x24, offset); } else { codegen_alloc_bytes(block, 4); - codegen_addbyte4(block, 0x66, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); + codegen_addbyte4(block, 0x66, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); /* MOV dst_reg, [base_reg + offset] */ } } else fatal("MOV16_REG_BASE_OFFSET - offset %i\n", offset); @@ -866,10 +866,10 @@ host_x86_MOV32_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 4); - codegen_addbyte4(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), 0x24, offset); + codegen_addbyte4(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), 0x24, offset); /* MOV dst_reg, [RSP + offset] */ } else { codegen_alloc_bytes(block, 3); - codegen_addbyte3(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); + codegen_addbyte3(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); /* MOV dst_reg, [base_reg + offset] */ } } else fatal("MOV32_REG_BASE_OFFSET - offset %i\n", offset); @@ -883,11 +883,11 @@ host_x86_MOV64_REG_BASE_OFFSET(codeblock_t *block, int dst_reg, int base_reg, in if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); - codegen_addbyte(block, 0x48); + codegen_addbyte(block, 0x48); /* MOV dst_reg, [RSP + offset] */ codegen_addbyte4(block, 0x8b, 0x40 | base_reg | (dst_reg << 3), 0x24, offset); } else { codegen_alloc_bytes(block, 4); - codegen_addbyte4(block, 0x48, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); + codegen_addbyte4(block, 0x48, 0x8b, 0x40 | base_reg | (dst_reg << 3), offset); /* MOV dst_reg, [base_reg + offset] */ } } else fatal("MOV32_REG_BASE_OFFSET - offset %i\n", offset); @@ -901,11 +901,11 @@ host_x86_MOV32_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { - codegen_alloc_bytes(block, 4); + codegen_alloc_bytes(block, 4); /* MOV [RSP + offset], src_reg*/ codegen_addbyte4(block, 0x89, 0x40 | base_reg | (src_reg << 3), 0x24, offset); } else { codegen_alloc_bytes(block, 3); - codegen_addbyte3(block, 0x89, 0x40 | base_reg | (src_reg << 3), offset); + codegen_addbyte3(block, 0x89, 0x40 | base_reg | (src_reg << 3), offset); /* MOV [base_reg + offset], src_reg*/ } } else fatal("MOV32_BASE_OFFSET_REG - offset %i\n", offset); @@ -919,11 +919,11 @@ host_x86_MOV64_BASE_OFFSET_REG(codeblock_t *block, int base_reg, int offset, int if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 5); - codegen_addbyte(block, 0x48); + codegen_addbyte(block, 0x48); /* MOV [RSP + offset], src_reg*/ codegen_addbyte4(block, 0x89, 0x40 | base_reg | (src_reg << 3), 0x24, offset); } else { codegen_alloc_bytes(block, 4); - codegen_addbyte4(block, 0x48, 0x89, 0x40 | base_reg | (src_reg << 3), offset); + codegen_addbyte4(block, 0x48, 0x89, 0x40 | base_reg | (src_reg << 3), offset); /* MOV [base_reg + offset], src_reg*/ } } else fatal("MOV64_BASE_OFFSET_REG - offset %i\n", offset); @@ -938,11 +938,11 @@ host_x86_MOV32_BASE_OFFSET_IMM(codeblock_t *block, int base_reg, int offset, uin if (offset >= -128 && offset <= 127) { if (base_reg == REG_RSP) { codegen_alloc_bytes(block, 8); - codegen_addbyte4(block, 0xc7, 0x40 | base_reg, 0x24, offset); + codegen_addbyte4(block, 0xc7, 0x40 | base_reg, 0x24, offset); /* MOV [RSP + offset], imm_data */ codegen_addlong(block, imm_data); } else { codegen_alloc_bytes(block, 7); - codegen_addbyte3(block, 0xc7, 0x40 | base_reg, offset); + codegen_addbyte3(block, 0xc7, 0x40 | base_reg, offset); /* MOV [base_reg + offset], src_reg*/ codegen_addlong(block, imm_data); } } else From 8d6087a016ba1eb034c9c6d6d99533d124cb418e Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Mon, 3 Mar 2025 01:06:03 +0600 Subject: [PATCH 15/22] Revert some unnecessary changes for NDR --- src/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ee17a7a21..fb3bf9f50 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -223,10 +223,6 @@ elseif(APPLE AND NOT QT) COMPONENT Runtime) endif() -if(NEW_DYNAREC AND (ARCH STREQUAL "x86_64") AND (NOT WIN32)) - add_compile_options(-mno-red-zone -fno-omit-frame-pointer) -endif() - # Install the PDB file on Windows builds if(MSVC) # CMake fully supports PDB files on MSVC-compatible compilers From 54650963f02a995264d2b4cb3fb103c9bc0dac52 Mon Sep 17 00:00:00 2001 From: The Dax Date: Sun, 2 Mar 2025 14:12:25 -0500 Subject: [PATCH 16/22] Fix Acer 100T machine not showing up on Linux due to case-sensitivity. --- src/machine/m_at_286_386sx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/machine/m_at_286_386sx.c b/src/machine/m_at_286_386sx.c index 24cb88f09..9086f7c8a 100644 --- a/src/machine/m_at_286_386sx.c +++ b/src/machine/m_at_286_386sx.c @@ -742,7 +742,7 @@ machine_at_acer100t_init(const machine_t *model) { int ret; - ret = bios_load_linear("roms/machines/acer100t/acer386.bin", + ret = bios_load_linear("roms/machines/acer100t/acer386.BIN", 0x000f0000, 65536, 0); if (bios_only || !ret) From fae26729f103010ddc7e621a37083bc4ae6d5736 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Mon, 3 Mar 2025 22:03:52 +0600 Subject: [PATCH 17/22] S3 ViRGE: Make IRQs happen in main thread FIFO is also no longer immediately woken up upon writes --- src/video/vid_s3_virge.c | 57 +++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 91b186240..05cd42459 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -338,6 +338,7 @@ typedef struct virge_t { event_t * fifo_not_full_event; atomic_int virge_busy; + atomic_uint virge_irq_req; uint8_t subsys_stat; uint8_t subsys_cntl; @@ -373,6 +374,9 @@ typedef struct virge_t { uint32_t dma_mmio_addr; uint32_t dma_data_type; + pc_timer_t wake_timer; + pc_timer_t irq_timer; + int pci; int is_agp; } virge_t; @@ -383,6 +387,28 @@ wake_fifo_thread(virge_t *virge) { thread_set_event(virge->wake_fifo_thread); } +void +s3_virge_wake_fifo_timer(void* priv) +{ + virge_t *virge = (virge_t *) priv; + + thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +void +s3_virge_wake_fifo_thread(virge_t *virge) +{ + if (!timer_is_enabled(&virge->wake_timer)) { + /*Don't wake FIFO thread immediately - if we do that it will probably + process one word and go back to sleep, requiring it to be woken on + almost every write. Instead, wait a short while so that the CPU + emulation writes more data so we have more batched-up work.*/ + timer_on_auto(&virge->wake_timer, 100.0); + } + if (!timer_is_enabled(&virge->irq_timer)) + timer_on_auto(&virge->irq_timer, 100.0); +} + static virge_t *reset_state = NULL; static video_timings_t timing_diamond_stealth3d_2000_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; @@ -471,6 +497,19 @@ s3_virge_update_irqs(virge_t *virge) pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } +void +s3_virge_update_irq_timer(void* priv) +{ + virge_t *virge = (virge_t *) priv; + + if (virge->virge_irq_req) { + virge->virge_irq_req--; + s3_virge_update_irqs(virge); + } + + timer_on_auto(&virge->irq_timer, 100.); +} + static void s3_virge_out(uint16_t addr, uint8_t val, void *priv) { @@ -1101,6 +1140,9 @@ static void s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; + if (virge->virge_irq_req) + virge->virge_irq_req--; + virge->subsys_stat |= INT_VSY; s3_virge_update_irqs(virge); } @@ -1795,7 +1837,8 @@ fifo_thread(void *param) if (virge->cmd_dma) virge->subsys_stat |= (INT_HOST_DONE | INT_CMD_DONE); - s3_virge_update_irqs(virge); + //s3_virge_update_irqs(virge); + virge->virge_irq_req++; } } @@ -1835,10 +1878,8 @@ s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) virge->fifo_write_idx++; - if (FIFO_ENTRIES > 0xe000) - wake_fifo_thread(virge); - if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) - wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 16) + s3_virge_wake_fifo_thread(virge); } static void @@ -3642,7 +3683,8 @@ render_thread(void *param) } virge->s3d_busy = 0; virge->subsys_stat |= INT_S3D_DONE; - s3_virge_update_irqs(virge); + //s3_virge_update_irqs(virge); + virge->virge_irq_req++; } } @@ -4571,6 +4613,9 @@ s3_virge_init(const device_t *info) virge->fifo_not_full_event = thread_create_event(); virge->fifo_thread = thread_create(fifo_thread, virge); + timer_add(&virge->wake_timer, s3_virge_wake_fifo_timer, virge, 0); + timer_add(&virge->irq_timer, s3_virge_update_irq_timer, virge, 0); + virge->local = info->local; *reset_state = *virge; From e24c9d51633730b165aa7c332600df8aee140c79 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Mon, 3 Mar 2025 23:22:24 +0600 Subject: [PATCH 18/22] S3 ViRGE: Use a significantly faster version of the ROP code DirectDraw tests now run significantly faster, and Direct3D tests also now run smoothly. --- src/video/vid_s3_virge.c | 788 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 775 insertions(+), 13 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 05cd42459..62ffcff0c 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -2216,19 +2216,781 @@ s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) } \ } while (0) -#define MIX() \ - do { \ - int c; \ - for (c = 0; c < 24; c++) { \ - int d = (dest & (1 << c)) ? 1 : 0; \ - if (source & (1 << c)) \ - d |= 2; \ - if (pattern & (1 << c)) \ - d |= 4; \ - if (virge->s3d.rop & (1 << d)) \ - out |= (1 << c); \ - } \ - } while (0) +#define ROPMIX(R, D, P, S, out) \ + { \ + switch (R) { \ + case 0x00: \ + out = 0; \ + break; \ + case 0x01: \ + out = ~(D | (P | S)); \ + break; \ + case 0x02: \ + out = D & ~(P | S); \ + break; \ + case 0x03: \ + out = ~(P | S); \ + break; \ + case 0x04: \ + out = S & ~(D | P); \ + break; \ + case 0x05: \ + out = ~(D | P); \ + break; \ + case 0x06: \ + out = ~(P | ~(D ^ S)); \ + break; \ + case 0x07: \ + out = ~(P | (D & S)); \ + break; \ + case 0x08: \ + out = S & (D & ~P); \ + break; \ + case 0x09: \ + out = ~(P | (D ^ S)); \ + break; \ + case 0x0a: \ + out = D & ~P; \ + break; \ + case 0x0b: \ + out = ~(P | (S & ~D)); \ + break; \ + case 0x0c: \ + out = S & ~P; \ + break; \ + case 0x0d: \ + out = ~(P | (D & ~S)); \ + break; \ + case 0x0e: \ + out = ~(P | ~(D | S)); \ + break; \ + case 0x0f: \ + out = ~P; \ + break; \ + case 0x10: \ + out = P & ~(D | S); \ + break; \ + case 0x11: \ + out = ~(D | S); \ + break; \ + case 0x12: \ + out = ~(S | ~(D ^ P)); \ + break; \ + case 0x13: \ + out = ~(S | (D & P)); \ + break; \ + case 0x14: \ + out = ~(D | ~(P ^ S)); \ + break; \ + case 0x15: \ + out = ~(D | (P & S)); \ + break; \ + case 0x16: \ + out = P ^ (S ^ (D & ~(P & S))); \ + break; \ + case 0x17: \ + out = ~(S ^ ((S ^ P) & (D ^ S))); \ + break; \ + case 0x18: \ + out = (S ^ P) & (P ^ D); \ + break; \ + case 0x19: \ + out = ~(S ^ (D & ~(P & S))); \ + break; \ + case 0x1a: \ + out = P ^ (D | (S & P)); \ + break; \ + case 0x1b: \ + out = ~(S ^ (D & (P ^ S))); \ + break; \ + case 0x1c: \ + out = P ^ (S | (D & P)); \ + break; \ + case 0x1d: \ + out = ~(D ^ (S & (P ^ D))); \ + break; \ + case 0x1e: \ + out = P ^ (D | S); \ + break; \ + case 0x1f: \ + out = ~(P & (D | S)); \ + break; \ + case 0x20: \ + out = D & (P & ~S); \ + break; \ + case 0x21: \ + out = ~(S | (D ^ P)); \ + break; \ + case 0x22: \ + out = D & ~S; \ + break; \ + case 0x23: \ + out = ~(S | (P & ~D)); \ + break; \ + case 0x24: \ + out = (S ^ P) & (D ^ S); \ + break; \ + case 0x25: \ + out = ~(P ^ (D & ~(S & P))); \ + break; \ + case 0x26: \ + out = S ^ (D | (P & S)); \ + break; \ + case 0x27: \ + out = S ^ (D | ~(P ^ S)); \ + break; \ + case 0x28: \ + out = D & (P ^ S); \ + break; \ + case 0x29: \ + out = ~(P ^ (S ^ (D | (P & S)))); \ + break; \ + case 0x2a: \ + out = D & ~(P & S); \ + break; \ + case 0x2b: \ + out = ~(S ^ ((S ^ P) & (P ^ D))); \ + break; \ + case 0x2c: \ + out = S ^ (P & (D | S)); \ + break; \ + case 0x2d: \ + out = P ^ (S | ~D); \ + break; \ + case 0x2e: \ + out = P ^ (S | (D ^ P)); \ + break; \ + case 0x2f: \ + out = ~(P & (S | ~D)); \ + break; \ + case 0x30: \ + out = P & ~S; \ + break; \ + case 0x31: \ + out = ~(S | (D & ~P)); \ + break; \ + case 0x32: \ + out = S ^ (D | (P | S)); \ + break; \ + case 0x33: \ + out = ~S; \ + break; \ + case 0x34: \ + out = S ^ (P | (D & S)); \ + break; \ + case 0x35: \ + out = S ^ (P | ~(D ^ S)); \ + break; \ + case 0x36: \ + out = S ^ (D | P); \ + break; \ + case 0x37: \ + out = ~(S & (D | P)); \ + break; \ + case 0x38: \ + out = P ^ (S & (D | P)); \ + break; \ + case 0x39: \ + out = S ^ (P | ~D); \ + break; \ + case 0x3a: \ + out = S ^ (P | (D ^ S)); \ + break; \ + case 0x3b: \ + out = ~(S & (P | ~D)); \ + break; \ + case 0x3c: \ + out = P ^ S; \ + break; \ + case 0x3d: \ + out = S ^ (P | ~(D | S)); \ + break; \ + case 0x3e: \ + out = S ^ (P | (D & ~S)); \ + break; \ + case 0x3f: \ + out = ~(P & S); \ + break; \ + case 0x40: \ + out = P & (S & ~D); \ + break; \ + case 0x41: \ + out = ~(D | (P ^ S)); \ + break; \ + case 0x42: \ + out = (S ^ D) & (P ^ D); \ + break; \ + case 0x43: \ + out = ~(S ^ (P & ~(D & S))); \ + break; \ + case 0x44: \ + out = S & ~D; \ + break; \ + case 0x45: \ + out = ~(D | (P & ~S)); \ + break; \ + case 0x46: \ + out = D ^ (S | (P & D)); \ + break; \ + case 0x47: \ + out = ~(P ^ (S & (D ^ P))); \ + break; \ + case 0x48: \ + out = S & (D ^ P); \ + break; \ + case 0x49: \ + out = ~(P ^ (D ^ (S | (P & D)))); \ + break; \ + case 0x4a: \ + out = D ^ (P & (S | D)); \ + break; \ + case 0x4b: \ + out = P ^ (D | ~S); \ + break; \ + case 0x4c: \ + out = S & ~(D & P); \ + break; \ + case 0x4d: \ + out = ~(S ^ ((S ^ P) | (D ^ S))); \ + break; \ + case 0x4e: \ + out = P ^ (D | (S ^ P)); \ + break; \ + case 0x4f: \ + out = ~(P & (D | ~S)); \ + break; \ + case 0x50: \ + out = P & ~D; \ + break; \ + case 0x51: \ + out = ~(D | (S & ~P)); \ + break; \ + case 0x52: \ + out = D ^ (P | (S & D)); \ + break; \ + case 0x53: \ + out = ~(S ^ (P & (D ^ S))); \ + break; \ + case 0x54: \ + out = ~(D | ~(P | S)); \ + break; \ + case 0x55: \ + out = ~D; \ + break; \ + case 0x56: \ + out = D ^ (P | S); \ + break; \ + case 0x57: \ + out = ~(D & (P | S)); \ + break; \ + case 0x58: \ + out = P ^ (D & (S | P)); \ + break; \ + case 0x59: \ + out = D ^ (P | ~S); \ + break; \ + case 0x5a: \ + out = D ^ P; \ + break; \ + case 0x5b: \ + out = D ^ (P | ~(S | D)); \ + break; \ + case 0x5c: \ + out = D ^ (P | (S ^ D)); \ + break; \ + case 0x5d: \ + out = ~(D & (P | ~S)); \ + break; \ + case 0x5e: \ + out = D ^ (P | (S & ~D)); \ + break; \ + case 0x5f: \ + out = ~(D & P); \ + break; \ + case 0x60: \ + out = P & (D ^ S); \ + break; \ + case 0x61: \ + out = ~(D ^ (S ^ (P | (D & S)))); \ + break; \ + case 0x62: \ + out = D ^ (S & (P | D)); \ + break; \ + case 0x63: \ + out = S ^ (D | ~P); \ + break; \ + case 0x64: \ + out = S ^ (D & (P | S)); \ + break; \ + case 0x65: \ + out = D ^ (S | ~P); \ + break; \ + case 0x66: \ + out = D ^ S; \ + break; \ + case 0x67: \ + out = S ^ (D | ~(P | S)); \ + break; \ + case 0x68: \ + out = ~(D ^ (S ^ (P | ~(D | S)))); \ + break; \ + case 0x69: \ + out = ~(P ^ (D ^ S)); \ + break; \ + case 0x6a: \ + out = D ^ (P & S); \ + break; \ + case 0x6b: \ + out = ~(P ^ (S ^ (D & (P | S)))); \ + break; \ + case 0x6c: \ + out = S ^ (D & P); \ + break; \ + case 0x6d: \ + out = ~(P ^ (D ^ (S & (P | D)))); \ + break; \ + case 0x6e: \ + out = S ^ (D & (P | ~S)); \ + break; \ + case 0x6f: \ + out = ~(P & ~(D ^ S)); \ + break; \ + case 0x70: \ + out = P & ~(D & S); \ + break; \ + case 0x71: \ + out = ~(S ^ ((S ^ D) & (P ^ D))); \ + break; \ + case 0x72: \ + out = S ^ (D | (P ^ S)); \ + break; \ + case 0x73: \ + out = ~(S & (D | ~P)); \ + break; \ + case 0x74: \ + out = D ^ (S | (P ^ D)); \ + break; \ + case 0x75: \ + out = ~(D & (S | ~P)); \ + break; \ + case 0x76: \ + out = S ^ (D | (P & ~S)); \ + break; \ + case 0x77: \ + out = ~(D & S); \ + break; \ + case 0x78: \ + out = P ^ (D & S); \ + break; \ + case 0x79: \ + out = ~(D ^ (S ^ (P & (D | S)))); \ + break; \ + case 0x7a: \ + out = D ^ (P & (S | ~D)); \ + break; \ + case 0x7b: \ + out = ~(S & ~(D ^ P)); \ + break; \ + case 0x7c: \ + out = S ^ (P & (D | ~S)); \ + break; \ + case 0x7d: \ + out = ~(D & ~(P ^ S)); \ + break; \ + case 0x7e: \ + out = (S ^ P) | (D ^ S); \ + break; \ + case 0x7f: \ + out = ~(D & (P & S)); \ + break; \ + case 0x80: \ + out = D & (P & S); \ + break; \ + case 0x81: \ + out = ~((S ^ P) | (D ^ S)); \ + break; \ + case 0x82: \ + out = D & ~(P ^ S); \ + break; \ + case 0x83: \ + out = ~(S ^ (P & (D | ~S))); \ + break; \ + case 0x84: \ + out = S & ~(D ^ P); \ + break; \ + case 0x85: \ + out = ~(P ^ (D & (S | ~P))); \ + break; \ + case 0x86: \ + out = D ^ (S ^ (P & (D | S))); \ + break; \ + case 0x87: \ + out = ~(P ^ (D & S)); \ + break; \ + case 0x88: \ + out = D & S; \ + break; \ + case 0x89: \ + out = ~(S ^ (D | (P & ~S))); \ + break; \ + case 0x8a: \ + out = D & (S | ~P); \ + break; \ + case 0x8b: \ + out = ~(D ^ (S | (P ^ D))); \ + break; \ + case 0x8c: \ + out = S & (D | ~P); \ + break; \ + case 0x8d: \ + out = ~(S ^ (D | (P ^ S))); \ + break; \ + case 0x8e: \ + out = S ^ ((S ^ D) & (P ^ D)); \ + break; \ + case 0x8f: \ + out = ~(P & ~(D & S)); \ + break; \ + case 0x90: \ + out = P & ~(D ^ S); \ + break; \ + case 0x91: \ + out = ~(S ^ (D & (P | ~S))); \ + break; \ + case 0x92: \ + out = D ^ (P ^ (S & (D | P))); \ + break; \ + case 0x93: \ + out = ~(S ^ (P & D)); \ + break; \ + case 0x94: \ + out = P ^ (S ^ (D & (P | S))); \ + break; \ + case 0x95: \ + out = ~(D ^ (P & S)); \ + break; \ + case 0x96: \ + out = D ^ (P ^ S); \ + break; \ + case 0x97: \ + out = P ^ (S ^ (D | ~(P | S))); \ + break; \ + case 0x98: \ + out = ~(S ^ (D | ~(P | S))); \ + break; \ + case 0x99: \ + out = ~(D ^ S); \ + break; \ + case 0x9a: \ + out = D ^ (P & ~S); \ + break; \ + case 0x9b: \ + out = ~(S ^ (D & (P | S))); \ + break; \ + case 0x9c: \ + out = S ^ (P & ~D); \ + break; \ + case 0x9d: \ + out = ~(D ^ (S & (P | D))); \ + break; \ + case 0x9e: \ + out = D ^ (S ^ (P | (D & S))); \ + break; \ + case 0x9f: \ + out = ~(P & (D ^ S)); \ + break; \ + case 0xa0: \ + out = D & P; \ + break; \ + case 0xa1: \ + out = ~(P ^ (D | (S & ~P))); \ + break; \ + case 0xa2: \ + out = D & (P | ~S); \ + break; \ + case 0xa3: \ + out = ~(D ^ (P | (S ^ D))); \ + break; \ + case 0xa4: \ + out = ~(P ^ (D | ~(S | P))); \ + break; \ + case 0xa5: \ + out = ~(P ^ D); \ + break; \ + case 0xa6: \ + out = D ^ (S & ~P); \ + break; \ + case 0xa7: \ + out = ~(P ^ (D & (S | P))); \ + break; \ + case 0xa8: \ + out = D & (P | S); \ + break; \ + case 0xa9: \ + out = ~(D ^ (P | S)); \ + break; \ + case 0xaa: \ + out = D; \ + break; \ + case 0xab: \ + out = D | ~(P | S); \ + break; \ + case 0xac: \ + out = S ^ (P & (D ^ S)); \ + break; \ + case 0xad: \ + out = ~(D ^ (P | (S & D))); \ + break; \ + case 0xae: \ + out = D | (S & ~P); \ + break; \ + case 0xaf: \ + out = D | ~P; \ + break; \ + case 0xb0: \ + out = P & (D | ~S); \ + break; \ + case 0xb1: \ + out = ~(P ^ (D | (S ^ P))); \ + break; \ + case 0xb2: \ + out = S ^ ((S ^ P) | (D ^ S)); \ + break; \ + case 0xb3: \ + out = ~(S & ~(D & P)); \ + break; \ + case 0xb4: \ + out = P ^ (S & ~D); \ + break; \ + case 0xb5: \ + out = ~(D ^ (P & (S | D))); \ + break; \ + case 0xb6: \ + out = D ^ (P ^ (S | (D & P))); \ + break; \ + case 0xb7: \ + out = ~(S & (D ^ P)); \ + break; \ + case 0xb8: \ + out = P ^ (S & (D ^ P)); \ + break; \ + case 0xb9: \ + out = ~(D ^ (S | (P & D))); \ + break; \ + case 0xba: \ + out = D | (P & ~S); \ + break; \ + case 0xbb: \ + out = D | ~S; \ + break; \ + case 0xbc: \ + out = S ^ (P & ~(D & S)); \ + break; \ + case 0xbd: \ + out = ~((S ^ D) & (P ^ D)); \ + break; \ + case 0xbe: \ + out = D | (P ^ S); \ + break; \ + case 0xbf: \ + out = D | ~(P & S); \ + break; \ + case 0xc0: \ + out = P & S; \ + break; \ + case 0xc1: \ + out = ~(S ^ (P | (D & ~S))); \ + break; \ + case 0xc2: \ + out = ~(S ^ (P | ~(D | S))); \ + break; \ + case 0xc3: \ + out = ~(P ^ S); \ + break; \ + case 0xc4: \ + out = S & (P | ~D); \ + break; \ + case 0xc5: \ + out = ~(S ^ (P | (D ^ S))); \ + break; \ + case 0xc6: \ + out = S ^ (D & ~P); \ + break; \ + case 0xc7: \ + out = ~(P ^ (S & (D | P))); \ + break; \ + case 0xc8: \ + out = S & (D | P); \ + break; \ + case 0xc9: \ + out = ~(S ^ (P | D)); \ + break; \ + case 0xca: \ + out = D ^ (P & (S ^ D)); \ + break; \ + case 0xcb: \ + out = ~(S ^ (P | (D & S))); \ + break; \ + case 0xcc: \ + out = S; \ + break; \ + case 0xcd: \ + out = S | ~(D | P); \ + break; \ + case 0xce: \ + out = S | (D & ~P); \ + break; \ + case 0xcf: \ + out = S | ~P; \ + break; \ + case 0xd0: \ + out = P & (S | ~D); \ + break; \ + case 0xd1: \ + out = ~(P ^ (S | (D ^ P))); \ + break; \ + case 0xd2: \ + out = P ^ (D & ~S); \ + break; \ + case 0xd3: \ + out = ~(S ^ (P & (D | S))); \ + break; \ + case 0xd4: \ + out = S ^ ((S ^ P) & (P ^ D)); \ + break; \ + case 0xd5: \ + out = ~(D & ~(P & S)); \ + break; \ + case 0xd6: \ + out = P ^ (S ^ (D | (P & S))); \ + break; \ + case 0xd7: \ + out = ~(D & (P ^ S)); \ + break; \ + case 0xd8: \ + out = P ^ (D & (S ^ P)); \ + break; \ + case 0xd9: \ + out = ~(S ^ (D | (P & S))); \ + break; \ + case 0xda: \ + out = D ^ (P & ~(S & D)); \ + break; \ + case 0xdb: \ + out = ~((S ^ P) & (D ^ S)); \ + break; \ + case 0xdc: \ + out = S | (P & ~D); \ + break; \ + case 0xdd: \ + out = S | ~D; \ + break; \ + case 0xde: \ + out = S | (D ^ P); \ + break; \ + case 0xdf: \ + out = S | ~(D & P); \ + break; \ + case 0xe0: \ + out = P & (D | S); \ + break; \ + case 0xe1: \ + out = ~(P ^ (D | S)); \ + break; \ + case 0xe2: \ + out = D ^ (S & (P ^ D)); \ + break; \ + case 0xe3: \ + out = ~(P ^ (S | (D & P))); \ + break; \ + case 0xe4: \ + out = S ^ (D & (P ^ S)); \ + break; \ + case 0xe5: \ + out = ~(P ^ (D | (S & P))); \ + break; \ + case 0xe6: \ + out = S ^ (D & ~(P & S)); \ + break; \ + case 0xe7: \ + out = ~((S ^ P) & (P ^ D)); \ + break; \ + case 0xe8: \ + out = S ^ ((S ^ P) & (D ^ S)); \ + break; \ + case 0xe9: \ + out = ~(D ^ (S ^ (P & ~(D & S)))); \ + break; \ + case 0xea: \ + out = D | (P & S); \ + break; \ + case 0xeb: \ + out = D | ~(P ^ S); \ + break; \ + case 0xec: \ + out = S | (D & P); \ + break; \ + case 0xed: \ + out = S | ~(D ^ P); \ + break; \ + case 0xee: \ + out = D | S; \ + break; \ + case 0xef: \ + out = S | (D | ~P); \ + break; \ + case 0xf0: \ + out = P; \ + break; \ + case 0xf1: \ + out = P | ~(D | S); \ + break; \ + case 0xf2: \ + out = P | (D & ~S); \ + break; \ + case 0xf3: \ + out = P | ~S; \ + break; \ + case 0xf4: \ + out = P | (S & ~D); \ + break; \ + case 0xf5: \ + out = P | ~D; \ + break; \ + case 0xf6: \ + out = P | (D ^ S); \ + break; \ + case 0xf7: \ + out = P | ~(D & S); \ + break; \ + case 0xf8: \ + out = P | (D & S); \ + break; \ + case 0xf9: \ + out = P | ~(D ^ S); \ + break; \ + case 0xfa: \ + out = D | P; \ + break; \ + case 0xfb: \ + out = D | (P | ~S); \ + break; \ + case 0xfc: \ + out = P | S; \ + break; \ + case 0xfd: \ + out = P | (S | ~D); \ + break; \ + case 0xfe: \ + out = D | (P | S); \ + break; \ + case 0xff: \ + out = ~0; \ + break; \ + } \ + } + +#define MIX() do { ROPMIX(virge->s3d.rop & 0xFF, dest, pattern, source, out); out &= 0xFFFFFF; } while (0) #define WRITE(addr, val) \ do { \ From 21f5415328cd5c101d020f3d1c601ba16322c53f Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Tue, 4 Mar 2025 01:31:19 +0600 Subject: [PATCH 19/22] Trident TGUI: Use a significantly faster version of the ROP code At least 30%-40% performance improvement seen --- src/video/vid_tgui9440.c | 790 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 778 insertions(+), 12 deletions(-) diff --git a/src/video/vid_tgui9440.c b/src/video/vid_tgui9440.c index d1391f1d1..c6edabac5 100644 --- a/src/video/vid_tgui9440.c +++ b/src/video/vid_tgui9440.c @@ -1457,18 +1457,784 @@ enum { else \ dat = vram_l[(addr) & (tgui->vram_mask >> 2)]; -#define MIX() \ - do { \ - out = 0; \ - for (c = 0; c < 32; c++) { \ - d = (dst_dat & (1 << c)) ? 1 : 0; \ - if (src_dat & (1 << c)) \ - d |= 2; \ - if (pat_dat & (1 << c)) \ - d |= 4; \ - if (tgui->accel.rop & (1 << d)) \ - out |= (1 << c); \ - } \ +#define ROPMIX(R, D, P, S, out) \ + { \ + switch (R) { \ + case 0x00: \ + out = 0; \ + break; \ + case 0x01: \ + out = ~(D | (P | S)); \ + break; \ + case 0x02: \ + out = D & ~(P | S); \ + break; \ + case 0x03: \ + out = ~(P | S); \ + break; \ + case 0x04: \ + out = S & ~(D | P); \ + break; \ + case 0x05: \ + out = ~(D | P); \ + break; \ + case 0x06: \ + out = ~(P | ~(D ^ S)); \ + break; \ + case 0x07: \ + out = ~(P | (D & S)); \ + break; \ + case 0x08: \ + out = S & (D & ~P); \ + break; \ + case 0x09: \ + out = ~(P | (D ^ S)); \ + break; \ + case 0x0a: \ + out = D & ~P; \ + break; \ + case 0x0b: \ + out = ~(P | (S & ~D)); \ + break; \ + case 0x0c: \ + out = S & ~P; \ + break; \ + case 0x0d: \ + out = ~(P | (D & ~S)); \ + break; \ + case 0x0e: \ + out = ~(P | ~(D | S)); \ + break; \ + case 0x0f: \ + out = ~P; \ + break; \ + case 0x10: \ + out = P & ~(D | S); \ + break; \ + case 0x11: \ + out = ~(D | S); \ + break; \ + case 0x12: \ + out = ~(S | ~(D ^ P)); \ + break; \ + case 0x13: \ + out = ~(S | (D & P)); \ + break; \ + case 0x14: \ + out = ~(D | ~(P ^ S)); \ + break; \ + case 0x15: \ + out = ~(D | (P & S)); \ + break; \ + case 0x16: \ + out = P ^ (S ^ (D & ~(P & S))); \ + break; \ + case 0x17: \ + out = ~(S ^ ((S ^ P) & (D ^ S))); \ + break; \ + case 0x18: \ + out = (S ^ P) & (P ^ D); \ + break; \ + case 0x19: \ + out = ~(S ^ (D & ~(P & S))); \ + break; \ + case 0x1a: \ + out = P ^ (D | (S & P)); \ + break; \ + case 0x1b: \ + out = ~(S ^ (D & (P ^ S))); \ + break; \ + case 0x1c: \ + out = P ^ (S | (D & P)); \ + break; \ + case 0x1d: \ + out = ~(D ^ (S & (P ^ D))); \ + break; \ + case 0x1e: \ + out = P ^ (D | S); \ + break; \ + case 0x1f: \ + out = ~(P & (D | S)); \ + break; \ + case 0x20: \ + out = D & (P & ~S); \ + break; \ + case 0x21: \ + out = ~(S | (D ^ P)); \ + break; \ + case 0x22: \ + out = D & ~S; \ + break; \ + case 0x23: \ + out = ~(S | (P & ~D)); \ + break; \ + case 0x24: \ + out = (S ^ P) & (D ^ S); \ + break; \ + case 0x25: \ + out = ~(P ^ (D & ~(S & P))); \ + break; \ + case 0x26: \ + out = S ^ (D | (P & S)); \ + break; \ + case 0x27: \ + out = S ^ (D | ~(P ^ S)); \ + break; \ + case 0x28: \ + out = D & (P ^ S); \ + break; \ + case 0x29: \ + out = ~(P ^ (S ^ (D | (P & S)))); \ + break; \ + case 0x2a: \ + out = D & ~(P & S); \ + break; \ + case 0x2b: \ + out = ~(S ^ ((S ^ P) & (P ^ D))); \ + break; \ + case 0x2c: \ + out = S ^ (P & (D | S)); \ + break; \ + case 0x2d: \ + out = P ^ (S | ~D); \ + break; \ + case 0x2e: \ + out = P ^ (S | (D ^ P)); \ + break; \ + case 0x2f: \ + out = ~(P & (S | ~D)); \ + break; \ + case 0x30: \ + out = P & ~S; \ + break; \ + case 0x31: \ + out = ~(S | (D & ~P)); \ + break; \ + case 0x32: \ + out = S ^ (D | (P | S)); \ + break; \ + case 0x33: \ + out = ~S; \ + break; \ + case 0x34: \ + out = S ^ (P | (D & S)); \ + break; \ + case 0x35: \ + out = S ^ (P | ~(D ^ S)); \ + break; \ + case 0x36: \ + out = S ^ (D | P); \ + break; \ + case 0x37: \ + out = ~(S & (D | P)); \ + break; \ + case 0x38: \ + out = P ^ (S & (D | P)); \ + break; \ + case 0x39: \ + out = S ^ (P | ~D); \ + break; \ + case 0x3a: \ + out = S ^ (P | (D ^ S)); \ + break; \ + case 0x3b: \ + out = ~(S & (P | ~D)); \ + break; \ + case 0x3c: \ + out = P ^ S; \ + break; \ + case 0x3d: \ + out = S ^ (P | ~(D | S)); \ + break; \ + case 0x3e: \ + out = S ^ (P | (D & ~S)); \ + break; \ + case 0x3f: \ + out = ~(P & S); \ + break; \ + case 0x40: \ + out = P & (S & ~D); \ + break; \ + case 0x41: \ + out = ~(D | (P ^ S)); \ + break; \ + case 0x42: \ + out = (S ^ D) & (P ^ D); \ + break; \ + case 0x43: \ + out = ~(S ^ (P & ~(D & S))); \ + break; \ + case 0x44: \ + out = S & ~D; \ + break; \ + case 0x45: \ + out = ~(D | (P & ~S)); \ + break; \ + case 0x46: \ + out = D ^ (S | (P & D)); \ + break; \ + case 0x47: \ + out = ~(P ^ (S & (D ^ P))); \ + break; \ + case 0x48: \ + out = S & (D ^ P); \ + break; \ + case 0x49: \ + out = ~(P ^ (D ^ (S | (P & D)))); \ + break; \ + case 0x4a: \ + out = D ^ (P & (S | D)); \ + break; \ + case 0x4b: \ + out = P ^ (D | ~S); \ + break; \ + case 0x4c: \ + out = S & ~(D & P); \ + break; \ + case 0x4d: \ + out = ~(S ^ ((S ^ P) | (D ^ S))); \ + break; \ + case 0x4e: \ + out = P ^ (D | (S ^ P)); \ + break; \ + case 0x4f: \ + out = ~(P & (D | ~S)); \ + break; \ + case 0x50: \ + out = P & ~D; \ + break; \ + case 0x51: \ + out = ~(D | (S & ~P)); \ + break; \ + case 0x52: \ + out = D ^ (P | (S & D)); \ + break; \ + case 0x53: \ + out = ~(S ^ (P & (D ^ S))); \ + break; \ + case 0x54: \ + out = ~(D | ~(P | S)); \ + break; \ + case 0x55: \ + out = ~D; \ + break; \ + case 0x56: \ + out = D ^ (P | S); \ + break; \ + case 0x57: \ + out = ~(D & (P | S)); \ + break; \ + case 0x58: \ + out = P ^ (D & (S | P)); \ + break; \ + case 0x59: \ + out = D ^ (P | ~S); \ + break; \ + case 0x5a: \ + out = D ^ P; \ + break; \ + case 0x5b: \ + out = D ^ (P | ~(S | D)); \ + break; \ + case 0x5c: \ + out = D ^ (P | (S ^ D)); \ + break; \ + case 0x5d: \ + out = ~(D & (P | ~S)); \ + break; \ + case 0x5e: \ + out = D ^ (P | (S & ~D)); \ + break; \ + case 0x5f: \ + out = ~(D & P); \ + break; \ + case 0x60: \ + out = P & (D ^ S); \ + break; \ + case 0x61: \ + out = ~(D ^ (S ^ (P | (D & S)))); \ + break; \ + case 0x62: \ + out = D ^ (S & (P | D)); \ + break; \ + case 0x63: \ + out = S ^ (D | ~P); \ + break; \ + case 0x64: \ + out = S ^ (D & (P | S)); \ + break; \ + case 0x65: \ + out = D ^ (S | ~P); \ + break; \ + case 0x66: \ + out = D ^ S; \ + break; \ + case 0x67: \ + out = S ^ (D | ~(P | S)); \ + break; \ + case 0x68: \ + out = ~(D ^ (S ^ (P | ~(D | S)))); \ + break; \ + case 0x69: \ + out = ~(P ^ (D ^ S)); \ + break; \ + case 0x6a: \ + out = D ^ (P & S); \ + break; \ + case 0x6b: \ + out = ~(P ^ (S ^ (D & (P | S)))); \ + break; \ + case 0x6c: \ + out = S ^ (D & P); \ + break; \ + case 0x6d: \ + out = ~(P ^ (D ^ (S & (P | D)))); \ + break; \ + case 0x6e: \ + out = S ^ (D & (P | ~S)); \ + break; \ + case 0x6f: \ + out = ~(P & ~(D ^ S)); \ + break; \ + case 0x70: \ + out = P & ~(D & S); \ + break; \ + case 0x71: \ + out = ~(S ^ ((S ^ D) & (P ^ D))); \ + break; \ + case 0x72: \ + out = S ^ (D | (P ^ S)); \ + break; \ + case 0x73: \ + out = ~(S & (D | ~P)); \ + break; \ + case 0x74: \ + out = D ^ (S | (P ^ D)); \ + break; \ + case 0x75: \ + out = ~(D & (S | ~P)); \ + break; \ + case 0x76: \ + out = S ^ (D | (P & ~S)); \ + break; \ + case 0x77: \ + out = ~(D & S); \ + break; \ + case 0x78: \ + out = P ^ (D & S); \ + break; \ + case 0x79: \ + out = ~(D ^ (S ^ (P & (D | S)))); \ + break; \ + case 0x7a: \ + out = D ^ (P & (S | ~D)); \ + break; \ + case 0x7b: \ + out = ~(S & ~(D ^ P)); \ + break; \ + case 0x7c: \ + out = S ^ (P & (D | ~S)); \ + break; \ + case 0x7d: \ + out = ~(D & ~(P ^ S)); \ + break; \ + case 0x7e: \ + out = (S ^ P) | (D ^ S); \ + break; \ + case 0x7f: \ + out = ~(D & (P & S)); \ + break; \ + case 0x80: \ + out = D & (P & S); \ + break; \ + case 0x81: \ + out = ~((S ^ P) | (D ^ S)); \ + break; \ + case 0x82: \ + out = D & ~(P ^ S); \ + break; \ + case 0x83: \ + out = ~(S ^ (P & (D | ~S))); \ + break; \ + case 0x84: \ + out = S & ~(D ^ P); \ + break; \ + case 0x85: \ + out = ~(P ^ (D & (S | ~P))); \ + break; \ + case 0x86: \ + out = D ^ (S ^ (P & (D | S))); \ + break; \ + case 0x87: \ + out = ~(P ^ (D & S)); \ + break; \ + case 0x88: \ + out = D & S; \ + break; \ + case 0x89: \ + out = ~(S ^ (D | (P & ~S))); \ + break; \ + case 0x8a: \ + out = D & (S | ~P); \ + break; \ + case 0x8b: \ + out = ~(D ^ (S | (P ^ D))); \ + break; \ + case 0x8c: \ + out = S & (D | ~P); \ + break; \ + case 0x8d: \ + out = ~(S ^ (D | (P ^ S))); \ + break; \ + case 0x8e: \ + out = S ^ ((S ^ D) & (P ^ D)); \ + break; \ + case 0x8f: \ + out = ~(P & ~(D & S)); \ + break; \ + case 0x90: \ + out = P & ~(D ^ S); \ + break; \ + case 0x91: \ + out = ~(S ^ (D & (P | ~S))); \ + break; \ + case 0x92: \ + out = D ^ (P ^ (S & (D | P))); \ + break; \ + case 0x93: \ + out = ~(S ^ (P & D)); \ + break; \ + case 0x94: \ + out = P ^ (S ^ (D & (P | S))); \ + break; \ + case 0x95: \ + out = ~(D ^ (P & S)); \ + break; \ + case 0x96: \ + out = D ^ (P ^ S); \ + break; \ + case 0x97: \ + out = P ^ (S ^ (D | ~(P | S))); \ + break; \ + case 0x98: \ + out = ~(S ^ (D | ~(P | S))); \ + break; \ + case 0x99: \ + out = ~(D ^ S); \ + break; \ + case 0x9a: \ + out = D ^ (P & ~S); \ + break; \ + case 0x9b: \ + out = ~(S ^ (D & (P | S))); \ + break; \ + case 0x9c: \ + out = S ^ (P & ~D); \ + break; \ + case 0x9d: \ + out = ~(D ^ (S & (P | D))); \ + break; \ + case 0x9e: \ + out = D ^ (S ^ (P | (D & S))); \ + break; \ + case 0x9f: \ + out = ~(P & (D ^ S)); \ + break; \ + case 0xa0: \ + out = D & P; \ + break; \ + case 0xa1: \ + out = ~(P ^ (D | (S & ~P))); \ + break; \ + case 0xa2: \ + out = D & (P | ~S); \ + break; \ + case 0xa3: \ + out = ~(D ^ (P | (S ^ D))); \ + break; \ + case 0xa4: \ + out = ~(P ^ (D | ~(S | P))); \ + break; \ + case 0xa5: \ + out = ~(P ^ D); \ + break; \ + case 0xa6: \ + out = D ^ (S & ~P); \ + break; \ + case 0xa7: \ + out = ~(P ^ (D & (S | P))); \ + break; \ + case 0xa8: \ + out = D & (P | S); \ + break; \ + case 0xa9: \ + out = ~(D ^ (P | S)); \ + break; \ + case 0xaa: \ + out = D; \ + break; \ + case 0xab: \ + out = D | ~(P | S); \ + break; \ + case 0xac: \ + out = S ^ (P & (D ^ S)); \ + break; \ + case 0xad: \ + out = ~(D ^ (P | (S & D))); \ + break; \ + case 0xae: \ + out = D | (S & ~P); \ + break; \ + case 0xaf: \ + out = D | ~P; \ + break; \ + case 0xb0: \ + out = P & (D | ~S); \ + break; \ + case 0xb1: \ + out = ~(P ^ (D | (S ^ P))); \ + break; \ + case 0xb2: \ + out = S ^ ((S ^ P) | (D ^ S)); \ + break; \ + case 0xb3: \ + out = ~(S & ~(D & P)); \ + break; \ + case 0xb4: \ + out = P ^ (S & ~D); \ + break; \ + case 0xb5: \ + out = ~(D ^ (P & (S | D))); \ + break; \ + case 0xb6: \ + out = D ^ (P ^ (S | (D & P))); \ + break; \ + case 0xb7: \ + out = ~(S & (D ^ P)); \ + break; \ + case 0xb8: \ + out = P ^ (S & (D ^ P)); \ + break; \ + case 0xb9: \ + out = ~(D ^ (S | (P & D))); \ + break; \ + case 0xba: \ + out = D | (P & ~S); \ + break; \ + case 0xbb: \ + out = D | ~S; \ + break; \ + case 0xbc: \ + out = S ^ (P & ~(D & S)); \ + break; \ + case 0xbd: \ + out = ~((S ^ D) & (P ^ D)); \ + break; \ + case 0xbe: \ + out = D | (P ^ S); \ + break; \ + case 0xbf: \ + out = D | ~(P & S); \ + break; \ + case 0xc0: \ + out = P & S; \ + break; \ + case 0xc1: \ + out = ~(S ^ (P | (D & ~S))); \ + break; \ + case 0xc2: \ + out = ~(S ^ (P | ~(D | S))); \ + break; \ + case 0xc3: \ + out = ~(P ^ S); \ + break; \ + case 0xc4: \ + out = S & (P | ~D); \ + break; \ + case 0xc5: \ + out = ~(S ^ (P | (D ^ S))); \ + break; \ + case 0xc6: \ + out = S ^ (D & ~P); \ + break; \ + case 0xc7: \ + out = ~(P ^ (S & (D | P))); \ + break; \ + case 0xc8: \ + out = S & (D | P); \ + break; \ + case 0xc9: \ + out = ~(S ^ (P | D)); \ + break; \ + case 0xca: \ + out = D ^ (P & (S ^ D)); \ + break; \ + case 0xcb: \ + out = ~(S ^ (P | (D & S))); \ + break; \ + case 0xcc: \ + out = S; \ + break; \ + case 0xcd: \ + out = S | ~(D | P); \ + break; \ + case 0xce: \ + out = S | (D & ~P); \ + break; \ + case 0xcf: \ + out = S | ~P; \ + break; \ + case 0xd0: \ + out = P & (S | ~D); \ + break; \ + case 0xd1: \ + out = ~(P ^ (S | (D ^ P))); \ + break; \ + case 0xd2: \ + out = P ^ (D & ~S); \ + break; \ + case 0xd3: \ + out = ~(S ^ (P & (D | S))); \ + break; \ + case 0xd4: \ + out = S ^ ((S ^ P) & (P ^ D)); \ + break; \ + case 0xd5: \ + out = ~(D & ~(P & S)); \ + break; \ + case 0xd6: \ + out = P ^ (S ^ (D | (P & S))); \ + break; \ + case 0xd7: \ + out = ~(D & (P ^ S)); \ + break; \ + case 0xd8: \ + out = P ^ (D & (S ^ P)); \ + break; \ + case 0xd9: \ + out = ~(S ^ (D | (P & S))); \ + break; \ + case 0xda: \ + out = D ^ (P & ~(S & D)); \ + break; \ + case 0xdb: \ + out = ~((S ^ P) & (D ^ S)); \ + break; \ + case 0xdc: \ + out = S | (P & ~D); \ + break; \ + case 0xdd: \ + out = S | ~D; \ + break; \ + case 0xde: \ + out = S | (D ^ P); \ + break; \ + case 0xdf: \ + out = S | ~(D & P); \ + break; \ + case 0xe0: \ + out = P & (D | S); \ + break; \ + case 0xe1: \ + out = ~(P ^ (D | S)); \ + break; \ + case 0xe2: \ + out = D ^ (S & (P ^ D)); \ + break; \ + case 0xe3: \ + out = ~(P ^ (S | (D & P))); \ + break; \ + case 0xe4: \ + out = S ^ (D & (P ^ S)); \ + break; \ + case 0xe5: \ + out = ~(P ^ (D | (S & P))); \ + break; \ + case 0xe6: \ + out = S ^ (D & ~(P & S)); \ + break; \ + case 0xe7: \ + out = ~((S ^ P) & (P ^ D)); \ + break; \ + case 0xe8: \ + out = S ^ ((S ^ P) & (D ^ S)); \ + break; \ + case 0xe9: \ + out = ~(D ^ (S ^ (P & ~(D & S)))); \ + break; \ + case 0xea: \ + out = D | (P & S); \ + break; \ + case 0xeb: \ + out = D | ~(P ^ S); \ + break; \ + case 0xec: \ + out = S | (D & P); \ + break; \ + case 0xed: \ + out = S | ~(D ^ P); \ + break; \ + case 0xee: \ + out = D | S; \ + break; \ + case 0xef: \ + out = S | (D | ~P); \ + break; \ + case 0xf0: \ + out = P; \ + break; \ + case 0xf1: \ + out = P | ~(D | S); \ + break; \ + case 0xf2: \ + out = P | (D & ~S); \ + break; \ + case 0xf3: \ + out = P | ~S; \ + break; \ + case 0xf4: \ + out = P | (S & ~D); \ + break; \ + case 0xf5: \ + out = P | ~D; \ + break; \ + case 0xf6: \ + out = P | (D ^ S); \ + break; \ + case 0xf7: \ + out = P | ~(D & S); \ + break; \ + case 0xf8: \ + out = P | (D & S); \ + break; \ + case 0xf9: \ + out = P | ~(D ^ S); \ + break; \ + case 0xfa: \ + out = D | P; \ + break; \ + case 0xfb: \ + out = D | (P | ~S); \ + break; \ + case 0xfc: \ + out = P | S; \ + break; \ + case 0xfd: \ + out = P | (S | ~D); \ + break; \ + case 0xfe: \ + out = D | (P | S); \ + break; \ + case 0xff: \ + out = ~0; \ + break; \ + } \ + } + +#define MIX() \ + do { \ + out = 0; \ + ROPMIX(tgui->accel.rop, dst_dat, pat_dat, src_dat, out); \ } while (0) #define WRITE(addr, dat) \ From a9653e5a99393924f09ab6f6c72fb8afc47310bf Mon Sep 17 00:00:00 2001 From: OBattler Date: Mon, 3 Mar 2025 21:21:02 +0100 Subject: [PATCH 20/22] TGUI9440: Fix two newly introduced warnings. --- src/video/vid_tgui9440.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video/vid_tgui9440.c b/src/video/vid_tgui9440.c index c6edabac5..af203f327 100644 --- a/src/video/vid_tgui9440.c +++ b/src/video/vid_tgui9440.c @@ -2256,8 +2256,6 @@ tgui_accel_command(int count, uint32_t cpu_dat, tgui_t *tgui) const uint32_t *pattern_data; int x; int y; - int c; - int d; uint32_t out; uint32_t src_dat = 0; uint32_t dst_dat; From 73576bb61ed1fb90be962f87028c3a6244b7723d Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Wed, 5 Mar 2025 01:23:37 +0600 Subject: [PATCH 21/22] Revert "S3 ViRGE: Make IRQs happen in main thread" This reverts commit fae26729f103010ddc7e621a37083bc4ae6d5736. --- src/video/vid_s3_virge.c | 57 +++++----------------------------------- 1 file changed, 6 insertions(+), 51 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index 62ffcff0c..af33003cb 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -338,7 +338,6 @@ typedef struct virge_t { event_t * fifo_not_full_event; atomic_int virge_busy; - atomic_uint virge_irq_req; uint8_t subsys_stat; uint8_t subsys_cntl; @@ -374,9 +373,6 @@ typedef struct virge_t { uint32_t dma_mmio_addr; uint32_t dma_data_type; - pc_timer_t wake_timer; - pc_timer_t irq_timer; - int pci; int is_agp; } virge_t; @@ -387,28 +383,6 @@ wake_fifo_thread(virge_t *virge) { thread_set_event(virge->wake_fifo_thread); } -void -s3_virge_wake_fifo_timer(void* priv) -{ - virge_t *virge = (virge_t *) priv; - - thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - -void -s3_virge_wake_fifo_thread(virge_t *virge) -{ - if (!timer_is_enabled(&virge->wake_timer)) { - /*Don't wake FIFO thread immediately - if we do that it will probably - process one word and go back to sleep, requiring it to be woken on - almost every write. Instead, wait a short while so that the CPU - emulation writes more data so we have more batched-up work.*/ - timer_on_auto(&virge->wake_timer, 100.0); - } - if (!timer_is_enabled(&virge->irq_timer)) - timer_on_auto(&virge->irq_timer, 100.0); -} - static virge_t *reset_state = NULL; static video_timings_t timing_diamond_stealth3d_2000_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; @@ -497,19 +471,6 @@ s3_virge_update_irqs(virge_t *virge) pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } -void -s3_virge_update_irq_timer(void* priv) -{ - virge_t *virge = (virge_t *) priv; - - if (virge->virge_irq_req) { - virge->virge_irq_req--; - s3_virge_update_irqs(virge); - } - - timer_on_auto(&virge->irq_timer, 100.); -} - static void s3_virge_out(uint16_t addr, uint8_t val, void *priv) { @@ -1140,9 +1101,6 @@ static void s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; - if (virge->virge_irq_req) - virge->virge_irq_req--; - virge->subsys_stat |= INT_VSY; s3_virge_update_irqs(virge); } @@ -1837,8 +1795,7 @@ fifo_thread(void *param) if (virge->cmd_dma) virge->subsys_stat |= (INT_HOST_DONE | INT_CMD_DONE); - //s3_virge_update_irqs(virge); - virge->virge_irq_req++; + s3_virge_update_irqs(virge); } } @@ -1878,8 +1835,10 @@ s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) virge->fifo_write_idx++; - if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 16) - s3_virge_wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000) + wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) + wake_fifo_thread(virge); } static void @@ -4445,8 +4404,7 @@ render_thread(void *param) } virge->s3d_busy = 0; virge->subsys_stat |= INT_S3D_DONE; - //s3_virge_update_irqs(virge); - virge->virge_irq_req++; + s3_virge_update_irqs(virge); } } @@ -5375,9 +5333,6 @@ s3_virge_init(const device_t *info) virge->fifo_not_full_event = thread_create_event(); virge->fifo_thread = thread_create(fifo_thread, virge); - timer_add(&virge->wake_timer, s3_virge_wake_fifo_timer, virge, 0); - timer_add(&virge->irq_timer, s3_virge_update_irq_timer, virge, 0); - virge->local = info->local; *reset_state = *virge; From 92d69475f4eea136226842784ffdcf2d862dbb77 Mon Sep 17 00:00:00 2001 From: Cacodemon345 Date: Wed, 5 Mar 2025 02:17:51 +0600 Subject: [PATCH 22/22] Only retain the newer IRQ updating code --- src/video/vid_s3_virge.c | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index af33003cb..904d4a4cf 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -338,6 +338,7 @@ typedef struct virge_t { event_t * fifo_not_full_event; atomic_int virge_busy; + atomic_uint irq_pending; uint8_t subsys_stat; uint8_t subsys_cntl; @@ -375,6 +376,8 @@ typedef struct virge_t { int pci; int is_agp; + + pc_timer_t irq_timer; } virge_t; static __inline void @@ -471,6 +474,19 @@ s3_virge_update_irqs(virge_t *virge) pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } +static void +s3_virge_update_irq_timer(void* priv) +{ + virge_t *virge = (virge_t *) priv; + + if (virge->irq_pending) { + virge->irq_pending--; + s3_virge_update_irqs(virge); + } + + timer_on_auto(&virge->irq_timer, 100.); +} + static void s3_virge_out(uint16_t addr, uint8_t val, void *priv) { @@ -1101,6 +1117,8 @@ static void s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; + if (virge->irq_pending) + virge->irq_pending--; virge->subsys_stat |= INT_VSY; s3_virge_update_irqs(virge); } @@ -1784,18 +1802,18 @@ fifo_thread(void *param) virge->fifo_read_idx++; fifo->addr_type = FIFO_INVALID; - if (FIFO_ENTRIES > 0xe000) - thread_set_event(virge->fifo_not_full_event); + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); - end_time = plat_timer_read(); - virge_time += end_time - start_time; - } - virge->virge_busy = 0; - virge->subsys_stat |= (INT_FIFO_EMP | INT_3DF_EMP); - if (virge->cmd_dma) - virge->subsys_stat |= (INT_HOST_DONE | INT_CMD_DONE); + end_time = plat_timer_read(); + virge_time += end_time - start_time; + } + virge->virge_busy = 0; + virge->subsys_stat |= (INT_FIFO_EMP | INT_3DF_EMP); + if (virge->cmd_dma) + virge->subsys_stat |= (INT_HOST_DONE | INT_CMD_DONE); - s3_virge_update_irqs(virge); + virge->irq_pending++; } } @@ -4404,7 +4422,7 @@ render_thread(void *param) } virge->s3d_busy = 0; virge->subsys_stat |= INT_S3D_DONE; - s3_virge_update_irqs(virge); + virge->irq_pending++; } } @@ -5062,6 +5080,7 @@ s3_virge_disable_handlers(virge_t *dev) reset_state->svga.timer = dev->svga.timer; reset_state->svga.timer8514 = dev->svga.timer8514; + reset_state->irq_timer = dev->irq_timer; } static void @@ -5333,6 +5352,8 @@ s3_virge_init(const device_t *info) virge->fifo_not_full_event = thread_create_event(); virge->fifo_thread = thread_create(fifo_thread, virge); + timer_add(&virge->irq_timer, s3_virge_update_irq_timer, virge, 1); + virge->local = info->local; *reset_state = *virge;