From 9bc5f0dd0577899ac23600f31d1f773bf72edff0 Mon Sep 17 00:00:00 2001 From: TC1995 Date: Thu, 9 Jan 2025 22:58:40 +0100 Subject: [PATCH] 8514/A and S3 changes of the late day (January 9th, 2025) S3: 1. Cleaned up the Short Stroke command processing. 2. Proceed calculating the error term only when it's equal or greater than the line length (Draw Line, Command 1 and also applies to Short Strokes, Command 0). 8514/A compatibles: 1. Reworked the polygon draw type A processing. 2. As with the S3, reworked the way error term is handled in the processing, and on Command 5 (Draw Polygon Boundary Line). --- src/include/86box/vid_8514a.h | 1 + src/video/vid_8514a.c | 142 ++++++++++++++++++++++++---------- src/video/vid_s3.c | 62 +++++++-------- 3 files changed, 128 insertions(+), 77 deletions(-) diff --git a/src/include/86box/vid_8514a.h b/src/include/86box/vid_8514a.h index 0e6e0965a..57e98cc44 100644 --- a/src/include/86box/vid_8514a.h +++ b/src/include/86box/vid_8514a.h @@ -138,6 +138,7 @@ typedef struct ibm8514_t { int output2; int ssv_len; + int ssv_len_back; uint8_t ssv_dir; uint8_t ssv_draw; int odd_in; diff --git a/src/video/vid_8514a.c b/src/video/vid_8514a.c index 810d403ab..0e51feae4 100644 --- a/src/video/vid_8514a.c +++ b/src/video/vid_8514a.c @@ -43,6 +43,11 @@ #include <86box/vid_ati_mach8.h> #include "cpu.h" +#ifdef CLAMP +# undef CLAMP +#endif + + #define BIOS_MACH8_ROM_PATH "roms/video/mach8/11301113140_4k.BIN" static void ibm8514_accel_outb(uint16_t port, uint8_t val, void *priv); @@ -68,6 +73,17 @@ ibm8514_log(const char *fmt, ...) # define ibm8514_log(fmt, ...) #endif +static int16_t +CLAMP(int16_t in, int16_t min, int16_t max) +{ + if (in < min) + return min; + if (in > max) + return max; + + return in; +} + #define WRITE8(addr, var, val) \ switch ((addr) & 1) { \ case 0: \ @@ -416,14 +432,6 @@ ibm8514_accel_out_fifo(svga_t *svga, uint16_t port, uint32_t val, int len) if (len == 2) { dev->accel.short_stroke = val; - dev->accel.cx = dev->accel.cur_x; - if (dev->accel.cur_x >= 0x600) - dev->accel.cx |= ~0x5ff; - - dev->accel.cy = dev->accel.cur_y; - if (dev->accel.cur_y >= 0x600) - dev->accel.cy |= ~0x5ff; - if (dev->accel.cmd & 0x1000) { ibm8514_short_stroke_start(-1, 0, -1, 0, svga, dev->accel.short_stroke & 0xff, len); ibm8514_short_stroke_start(-1, 0, -1, 0, svga, dev->accel.short_stroke >> 8, len); @@ -969,6 +977,7 @@ ibm8514_short_stroke_start(int count, int cpu_input, uint32_t mix_dat, uint32_t dev->accel.ssv_len = ssv & 0x0f; dev->accel.ssv_dir = ssv & 0xe0; dev->accel.ssv_draw = ssv & 0x10; + dev->accel.ssv_len_back = dev->accel.ssv_len; if (ibm8514_cpu_src(svga)) { dev->data_available = 0; @@ -1006,6 +1015,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat uint16_t bkgd_color = dev->accel.bkgd_color; uint32_t old_mix_dat; int and3 = dev->accel.cur_x & 3; + int poly_src; if (!dev->bpp) { compare &= 0xff; @@ -1121,13 +1131,24 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat old_mix_dat = mix_dat; + if (cmd == 5 || cmd == 1 || (cmd == 2 && (dev->accel.multifunc[0x0a] & 0x06))) + ibm8514_log("CMD=%d, full=%04x, pixcntl=%d, filling=%02x.\n", cmd, dev->accel.cmd, pixcntl, dev->accel.multifunc[0x0a] & 0x06); + /*Bit 4 of the Command register is the draw yes bit, which enables writing to memory/reading from memory when enabled. When this bit is disabled, no writing to memory/reading from memory is allowed. (This bit is almost meaningless on the NOP command)*/ switch (cmd) { case 0: /*NOP (Short Stroke Vectors)*/ - if (dev->accel.ssv_state == 0) + if (dev->accel.ssv_state == 0) { + dev->accel.cx = dev->accel.cur_x; + if (dev->accel.cur_x >= 0x600) + dev->accel.cx |= ~0x5ff; + + dev->accel.cy = dev->accel.cur_y; + if (dev->accel.cur_y >= 0x600) + dev->accel.cy |= ~0x5ff; break; + } if (dev->accel.cmd & 0x08) { while (count-- && dev->accel.ssv_len >= 0) { @@ -1289,7 +1310,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cy--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.ssv_len_back) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x20) dev->accel.cx++; @@ -1303,7 +1324,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cx--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.ssv_len_back) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x80) dev->accel.cy++; @@ -1454,6 +1475,10 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat if (!dev->accel.sy) { dev->accel.cmd_back = 1; + if (!cpu_input) { + dev->accel.cur_x = dev->accel.cx; + dev->accel.cur_y = dev->accel.cy; + } break; } @@ -1505,8 +1530,6 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat dev->accel.sy--; } - dev->accel.cur_x = dev->accel.cx; - dev->accel.cur_y = dev->accel.cy; dev->accel.x_count = 0; dev->accel.output = 0; } else { /*Bresenham Line*/ @@ -1580,7 +1603,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cy--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x20) dev->accel.cx++; @@ -1594,7 +1617,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cx--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x80) dev->accel.cy++; @@ -1673,6 +1696,10 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat if (!dev->accel.sy) { dev->accel.cmd_back = 1; + if (!cpu_input) { + dev->accel.cur_x = dev->accel.cx; + dev->accel.cur_y = dev->accel.cy; + } break; } @@ -1682,7 +1709,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cy--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x20) dev->accel.cx++; @@ -1696,7 +1723,7 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat else dev->accel.cx--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x80) dev->accel.cy++; @@ -1709,8 +1736,6 @@ ibm8514_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat dev->accel.sy--; } } - dev->accel.cur_x = dev->accel.cx; - dev->accel.cur_y = dev->accel.cy; } break; @@ -2283,35 +2308,62 @@ skip_nibble_rect_write: } } } else if ((dev->accel.multifunc[0x0a] & 0x06) == 0x04) { /*Polygon Draw Type A*/ + ibm8514_log("Polygon Draw Type A: Clipping: L=%d, R=%d, T=%d, B=%d, C(%d,%d), sx=%d, sy=%d.\n", clip_l, clip_r, clip_t, clip_b, dev->accel.cx, dev->accel.cy, dev->accel.sx, dev->accel.sy); while (count-- && (dev->accel.sy >= 0)) { if ((dev->accel.cx >= clip_l) && (dev->accel.cx <= clip_r) && (dev->accel.cy >= clip_t) && (dev->accel.cy <= clip_b)) { - READ(dev->accel.dest + dev->accel.cx, mix_dat); - if ((mix_dat & rd_mask_polygon) == rd_mask_polygon) + switch ((mix_dat & mix_mask) ? frgd_mix : bkgd_mix) { + case 0: + src_dat = bkgd_color; + break; + case 1: + src_dat = frgd_color; + break; + case 2: + case 3: + src_dat = 0; + break; + + default: + break; + } + + READ(dev->accel.dest + dev->accel.cx, poly_src); + if ((poly_src & rd_mask_polygon) == rd_mask_polygon) dev->accel.fill_state ^= 1; READ(dev->accel.dest + dev->accel.cx, dest_dat); + old_dest_dat = dest_dat; if (dev->accel.fill_state) { - if (!(rd_mask_polygon & 0x01) && (wrt_mask & 0x01)) { - MIX(mix_dat ^ rd_mask_polygon, dest_dat, mix_dat); - ibm8514_log("Filling c(%d,%d) without bit 0 of rdmask=%02x, wrtmask=%02x, mixdat=%02x, dest=%02x, old=%02x.\n", dev->accel.cx, dev->accel.cy, rd_mask_polygon, wrt_mask, mix_dat, dest_dat, old_dest_dat); - dest_dat &= ~rd_mask_polygon; - } else if ((rd_mask_polygon & 0x01) && (wrt_mask & 0x01)) { - ibm8514_log("Filling c(%d,%d) with bit 0 of rdmask=%02x, wrtmask=%02x.\n", dev->accel.cx, dev->accel.cy, rd_mask_polygon, wrt_mask); - dest_dat &= ~(rd_mask_polygon & wrt_mask); + if (rd_mask_polygon & 0x01) { + if (wrt_mask & 0x01) { + dest_dat &= ~(rd_mask_polygon & wrt_mask); /*Fill State On, Write Mask 1, Read Mask 1.*/ + dest_dat = (dest_dat & wrt_mask) | (old_dest_dat & ~wrt_mask); + } + } else { + if (wrt_mask & 0x01) { + MIX(mix_dat & mix_mask, dest_dat, src_dat); + dest_dat &= ~rd_mask_polygon; /*Fill State On, Write Mask 1, Read Mask 0.*/ + dest_dat = (dest_dat & wrt_mask) | (old_dest_dat & ~wrt_mask); + } } } else { - if (!(rd_mask_polygon & 0x01) && (wrt_mask & 0x01)) - dest_dat &= ~rd_mask_polygon; - else if ((rd_mask_polygon & 0x01) && (wrt_mask & 0x01)) - dest_dat &= ~(rd_mask_polygon & wrt_mask); + if (rd_mask_polygon & 0x01) { + if (wrt_mask & 0x01) { + dest_dat &= ~(rd_mask_polygon & wrt_mask); /*Fill State Off, Write Mask 1, Read Mask 1.*/ + dest_dat = (dest_dat & wrt_mask) | (old_dest_dat & ~wrt_mask); + } + } else { + if (wrt_mask & 0x01) { + dest_dat &= ~rd_mask_polygon; /*Fill State Off, Write Mask 1, Read Mask 0.*/ + dest_dat = (dest_dat & wrt_mask) | (old_dest_dat & ~wrt_mask); + } + } } - dest_dat = (dest_dat & wrt_mask) | (old_dest_dat & ~wrt_mask); - if ((compare_mode == 0) || ((compare_mode == 0x10) && (dest_dat >= compare)) || ((compare_mode == 0x18) && (dest_dat < compare)) || @@ -2322,7 +2374,11 @@ skip_nibble_rect_write: ibm8514_log("Results c(%d,%d):rdmask=%02x, wrtmask=%02x, mix=%02x, destdat=%02x, nowrite=%d.\n", dev->accel.cx, dev->accel.cy, rd_mask_polygon, wrt_mask, mix_dat, dest_dat, dev->accel.cx_back); WRITE(dev->accel.dest + dev->accel.cx, dest_dat); } - } + } else + ibm8514_log("Out of bounds DrawA C(%d,%d).\n", dev->accel.cx, dev->accel.cy); + + mix_dat <<= 1; + mix_dat |= 1; if (dev->accel.cmd & 0x20) dev->accel.cx++; @@ -2356,6 +2412,8 @@ skip_nibble_rect_write: if (dev->accel.sy < 0) { ibm8514_log(".\n"); dev->accel.cmd_back = 1; + dev->accel.cur_x = dev->accel.cx; + dev->accel.cur_y = dev->accel.cy; return; } } @@ -2468,7 +2526,7 @@ skip_nibble_rect_write: else dev->accel.oldcy = dev->accel.cy - 1; - ibm8514_log("Polygon Boundary activated=%04x, len=%d, cur(%d,%d), frgdmix=%02x, err=%d, clipping: l=%d, r=%d, t=%d, b=%d, pixcntl=%02x.\n", dev->accel.cmd, dev->accel.sy, dev->accel.cx, dev->accel.cy, dev->accel.frgd_mix & 0x1f, dev->accel.err_term, dev->accel.multifunc[2], dev->accel.multifunc[4], dev->accel.clip_top, clip_b, dev->accel.multifunc[0x0a]); + ibm8514_log("Polygon Boundary activated=%04x, len=%d, cur(%d,%d), frgdmix=%02x, err=%d, clipping: l=%d, r=%d, t=%d, b=%d, pixcntl=%02x.\n", dev->accel.cmd, dev->accel.sy, dev->accel.cx, dev->accel.cy, dev->accel.frgd_mix & 0x1f, dev->accel.err_term, clip_l, clip_r, clip_t, clip_b, dev->accel.multifunc[0x0a]); if (ibm8514_cpu_src(svga)) { dev->data_available = 0; @@ -2483,8 +2541,7 @@ skip_nibble_rect_write: if (dev->accel.cmd & 0x08) { /*Vectored Boundary Line*/ while (count-- && (dev->accel.sy >= 0)) { - if (dev->accel.cx < clip_l) - dev->accel.cx = clip_l; + dev->accel.cx = CLAMP(dev->accel.cx, clip_l, clip_r); if ((dev->accel.cx >= clip_l) && (dev->accel.cx <= clip_r) && @@ -2591,11 +2648,10 @@ skip_nibble_rect_write: } } else { /*Vectored Bresenham*/ while (count-- && (dev->accel.sy >= 0)) { - if (dev->accel.cx < clip_l) - dev->accel.cx = clip_l; + dev->accel.cx = CLAMP(dev->accel.cx, clip_l, clip_r); if ((dev->accel.cx >= clip_l) && - (dev->accel.cx <= clip_r) && + (dev->accel.cx < clip_r) && (dev->accel.cy >= clip_t) && (dev->accel.cy <= clip_b)) { switch ((mix_dat & mix_mask) ? frgd_mix : bkgd_mix) { @@ -2658,7 +2714,7 @@ skip_nibble_rect_write: else dev->accel.cy--; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt_no_limit) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x20) dev->accel.cx++; @@ -2673,7 +2729,7 @@ skip_nibble_rect_write: dev->accel.cx--; dev->accel.oldcy = dev->accel.cy; - if (dev->accel.err_term >= 0) { + if (dev->accel.err_term >= dev->accel.maj_axis_pcnt_no_limit) { dev->accel.err_term += dev->accel.destx_distp; if (dev->accel.cmd & 0x80) dev->accel.cy++; diff --git a/src/video/vid_s3.c b/src/video/vid_s3.c index abb2d70aa..bb5c283ad 100644 --- a/src/video/vid_s3.c +++ b/src/video/vid_s3.c @@ -273,6 +273,7 @@ typedef struct s3_t { int dat_count; int b2e8_pix, temp_cnt; int ssv_len; + int ssv_len_back; uint8_t ssv_dir; uint8_t ssv_draw; uint8_t dat_buf_16bit; @@ -511,7 +512,7 @@ s3_update_irqs(s3_t *s3) } void s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, void *priv); -void s3_short_stroke_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, s3_t *s3, uint8_t ssv); +void s3_short_stroke_start(s3_t *s3, uint8_t ssv); static void s3_visionx68_video_engine_op(uint32_t cpu_dat, s3_t *s3); #define WRITE8(addr, var, val) \ @@ -922,15 +923,12 @@ s3_accel_out_fifo(s3_t *s3, uint16_t port, uint8_t val) s3->accel.short_stroke = (s3->accel.short_stroke & 0xff) | (val << 8); s3->accel.ssv_state = 1; - s3->accel.cx = s3->accel.cur_x & 0xfff; - s3->accel.cy = s3->accel.cur_y & 0xfff; - if (s3->accel.cmd & 0x1000) { - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke & 0xff); - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke >> 8); + s3_short_stroke_start(s3, s3->accel.short_stroke & 0xff); + s3_short_stroke_start(s3, s3->accel.short_stroke >> 8); } else { - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke >> 8); - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke & 0xff); + s3_short_stroke_start(s3, s3->accel.short_stroke >> 8); + s3_short_stroke_start(s3, s3->accel.short_stroke & 0xff); } break; @@ -1787,15 +1785,12 @@ s3_accel_out_fifo_w(s3_t *s3, uint16_t port, uint16_t val) s3->accel.short_stroke = val; s3->accel.ssv_state = 1; - s3->accel.cx = s3->accel.cur_x & 0xfff; - s3->accel.cy = s3->accel.cur_y & 0xfff; - if (s3->accel.cmd & 0x1000) { - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke & 0xff); - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke >> 8); + s3_short_stroke_start(s3, s3->accel.short_stroke & 0xff); + s3_short_stroke_start(s3, s3->accel.short_stroke >> 8); } else { - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke >> 8); - s3_short_stroke_start(-1, 0, 0xffffffff, 0, s3, s3->accel.short_stroke & 0xff); + s3_short_stroke_start(s3, s3->accel.short_stroke >> 8); + s3_short_stroke_start(s3, s3->accel.short_stroke & 0xff); } } } @@ -7823,19 +7818,16 @@ s3_visionx68_video_engine_op(uint32_t cpu_dat, s3_t *s3) } void -s3_short_stroke_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, s3_t *s3, uint8_t ssv) +s3_short_stroke_start(s3_t *s3, uint8_t ssv) { - if (!cpu_input) { - s3->accel.ssv_len = ssv & 0x0f; - s3->accel.ssv_dir = ssv & 0xe0; - s3->accel.ssv_draw = ssv & 0x10; + s3->accel.ssv_len = ssv & 0x0f; + s3->accel.ssv_dir = ssv & 0xe0; + s3->accel.ssv_draw = !!(ssv & 0x10); - if (s3_cpu_src(s3)) { - return; /*Wait for data from CPU*/ - } - } + if (s3_cpu_src(s3)) + return; /*Wait for data from CPU*/ - s3->accel_start(count, cpu_input, mix_dat, cpu_dat, s3); + s3->accel_start(-1, 0, -1, 0, s3); } void @@ -7978,11 +7970,13 @@ s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, voi switch (cmd) { case 0: /*NOP (Short Stroke Vectors)*/ - if (s3->accel.ssv_state == 0) + if (s3->accel.ssv_state == 0) { + s3->accel.cx = s3->accel.cur_x & 0xfff; + s3->accel.cy = s3->accel.cur_y & 0xfff; break; + } - if (s3->accel.cmd & 0x08) /*Radial*/ - { + if (s3->accel.cmd & 0x08) { /*Radial*/ while (count-- && s3->accel.ssv_len >= 0) { if ((s3->accel.cx & 0xfff) >= clip_l && (s3->accel.cx & 0xfff) <= clip_r && (s3->accel.cy & 0xfff) >= clip_t && (s3->accel.cy & 0xfff) <= clip_b) { switch ((mix_dat & mix_mask) ? frgd_mix : bkgd_mix) { @@ -8036,8 +8030,11 @@ s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, voi else cpu_dat >>= 16; - if (!s3->accel.ssv_len) + if (!s3->accel.ssv_len) { + s3->accel.cur_x = s3->accel.cx & 0xfff; + s3->accel.cur_y = s3->accel.cy & 0xfff; break; + } switch (s3->accel.ssv_dir & 0xe0) { case 0x00: @@ -8077,9 +8074,6 @@ s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, voi s3->accel.cx &= 0xfff; s3->accel.cy &= 0xfff; } - - s3->accel.cur_x = s3->accel.cx & 0xfff; - s3->accel.cur_y = s3->accel.cy & 0xfff; } break; @@ -8270,7 +8264,7 @@ s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, voi else s3->accel.cy--; - if (s3->accel.err_term >= 0) { + if (s3->accel.err_term >= s3->accel.maj_axis_pcnt) { s3->accel.err_term += s3->accel.destx_distp; if (s3->accel.cmd & 0x20) s3->accel.cx++; @@ -8284,7 +8278,7 @@ s3_accel_start(int count, int cpu_input, uint32_t mix_dat, uint32_t cpu_dat, voi else s3->accel.cx--; - if (s3->accel.err_term >= 0) { + if (s3->accel.err_term >= s3->accel.maj_axis_pcnt) { s3->accel.err_term += s3->accel.destx_distp; if (s3->accel.cmd & 0x80) s3->accel.cy++;