diff --git a/src/86box.c b/src/86box.c index 3c96d88a4..73f8a67ee 100644 --- a/src/86box.c +++ b/src/86box.c @@ -180,6 +180,7 @@ uint32_t isa_mem_size = 0; /* (C) memory int cpu_use_dynarec = 0; /* (C) cpu uses/needs Dyna */ int cpu = 0; /* (C) cpu type */ int fpu_type = 0; /* (C) fpu type */ +int fpu_softfloat = 0; /* (C) fpu uses softfloat */ int time_sync = 0; /* (C) enable time sync */ int confirm_reset = 1; /* (C) enable reset confirmation */ int confirm_exit = 1; /* (C) enable exit confirmation */ diff --git a/src/config.c b/src/config.c index 7641c637c..a83512bde 100644 --- a/src/config.c +++ b/src/config.c @@ -504,6 +504,7 @@ load_machine(void) mem_size = machine_get_max_ram(machine); cpu_use_dynarec = !!ini_section_get_int(cat, "cpu_use_dynarec", 0); + fpu_softfloat = !!ini_section_get_int(cat, "fpu_softfloat", 0); p = ini_section_get_string(cat, "time_sync", NULL); if (p != NULL) { @@ -2244,6 +2245,7 @@ save_machine(void) ini_section_set_int(cat, "mem_size", mem_size); ini_section_set_int(cat, "cpu_use_dynarec", cpu_use_dynarec); + ini_section_set_int(cat, "fpu_softfloat", fpu_softfloat); if (time_sync & TIME_SYNC_ENABLED) if (time_sync & TIME_SYNC_UTC) diff --git a/src/cpu/808x.c b/src/cpu/808x.c index 1a842dd9c..d7e50b0b5 100644 --- a/src/cpu/808x.c +++ b/src/cpu/808x.c @@ -3178,33 +3178,63 @@ execx86(int cycs) tempw = cpu_state.pc; if (!hasfpu) geteaw(); - else - switch (opcode) { - case 0xD8: - ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); - break; - case 0xD9: - ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); - break; - case 0xDA: - ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); - break; - case 0xDB: - ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); - break; - case 0xDC: - ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); - break; - case 0xDD: - ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); - break; - case 0xDE: - ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); - break; - case 0xDF: - ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); - break; + else { + if (fpu_softfloat) { + switch (opcode) { + case 0xD8: + ops_sf_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + break; + case 0xD9: + ops_sf_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDA: + ops_sf_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDB: + ops_sf_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDC: + ops_sf_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + break; + case 0xDD: + ops_sf_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDE: + ops_sf_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDF: + ops_sf_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + break; + } + } else { + switch (opcode) { + case 0xD8: + ops_fpu_8087_d8[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + break; + case 0xD9: + ops_fpu_8087_d9[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDA: + ops_fpu_8087_da[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDB: + ops_fpu_8087_db[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDC: + ops_fpu_8087_dc[(rmdat >> 3) & 0x1f]((uint32_t) rmdat); + break; + case 0xDD: + ops_fpu_8087_dd[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDE: + ops_fpu_8087_de[rmdat & 0xff]((uint32_t) rmdat); + break; + case 0xDF: + ops_fpu_8087_df[rmdat & 0xff]((uint32_t) rmdat); + break; + } } + } cpu_state.pc = tempw; /* Do this as the x87 code advances it, which is needed on the 286+ core, but not here. */ wait(1, 0); diff --git a/src/cpu/CMakeLists.txt b/src/cpu/CMakeLists.txt index 68baaf293..18aa06023 100644 --- a/src/cpu/CMakeLists.txt +++ b/src/cpu/CMakeLists.txt @@ -32,3 +32,6 @@ if(DYNAREC) codegen_timing_pentium.c codegen_timing_p6.c codegen_timing_winchip.c codegen_timing_winchip2.c) endif() + +add_subdirectory(softfloat) +target_link_libraries(86Box softfloat) diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index 102f9f56c..f70c2155a 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -74,6 +74,7 @@ enum { /* Make sure this is as low as possible. */ cpu_state_t cpu_state; +fpu_state_t fpu_state; /* Place this immediately after. */ uint32_t abrt_error; @@ -201,6 +202,8 @@ void cpu_set_edx(void) { EDX = cpu_s->edx_reset; + if (!cpu_use_dynarec && fpu_softfloat) + SF_FPU_reset(); } cpu_family_t * @@ -344,6 +347,23 @@ cpu_family_is_eligible(const cpu_family_t *cpu_family, int machine) return 0; } +void +SF_FPU_reset(void) +{ + if (fpu_type != FPU_NONE) { + fpu_state.cwd = 0x0040; + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0x5555; + fpu_state.foo = 0; + fpu_state.fip = 0; + fpu_state.fcs = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + memset(fpu_state.st_space, 0, sizeof(floatx80)*8); + } +} + void cpu_set(void) { @@ -454,22 +474,41 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_df_a32; #endif - x86_opcodes_d8_a16 = ops_fpu_d8_a16; - x86_opcodes_d8_a32 = ops_fpu_d8_a32; - x86_opcodes_d9_a16 = ops_fpu_d9_a16; - x86_opcodes_d9_a32 = ops_fpu_d9_a32; - x86_opcodes_da_a16 = ops_fpu_da_a16; - x86_opcodes_da_a32 = ops_fpu_da_a32; - x86_opcodes_db_a16 = ops_fpu_db_a16; - x86_opcodes_db_a32 = ops_fpu_db_a32; - x86_opcodes_dc_a16 = ops_fpu_dc_a16; - x86_opcodes_dc_a32 = ops_fpu_dc_a32; - x86_opcodes_dd_a16 = ops_fpu_dd_a16; - x86_opcodes_dd_a32 = ops_fpu_dd_a32; - x86_opcodes_de_a16 = ops_fpu_de_a16; - x86_opcodes_de_a32 = ops_fpu_de_a32; - x86_opcodes_df_a16 = ops_fpu_df_a16; - x86_opcodes_df_a32 = ops_fpu_df_a32; + if (fpu_softfloat) { + x86_opcodes_d8_a16 = ops_sf_fpu_d8_a16; + x86_opcodes_d8_a32 = ops_sf_fpu_d8_a32; + x86_opcodes_d9_a16 = ops_sf_fpu_d9_a16; + x86_opcodes_d9_a32 = ops_sf_fpu_d9_a32; + x86_opcodes_da_a16 = ops_sf_fpu_da_a16; + x86_opcodes_da_a32 = ops_sf_fpu_da_a32; + x86_opcodes_db_a16 = ops_sf_fpu_db_a16; + x86_opcodes_db_a32 = ops_sf_fpu_db_a32; + x86_opcodes_dc_a16 = ops_sf_fpu_dc_a16; + x86_opcodes_dc_a32 = ops_sf_fpu_dc_a32; + x86_opcodes_dd_a16 = ops_sf_fpu_dd_a16; + x86_opcodes_dd_a32 = ops_sf_fpu_dd_a32; + x86_opcodes_de_a16 = ops_sf_fpu_de_a16; + x86_opcodes_de_a32 = ops_sf_fpu_de_a32; + x86_opcodes_df_a16 = ops_sf_fpu_df_a16; + x86_opcodes_df_a32 = ops_sf_fpu_df_a32; + } else { + x86_opcodes_d8_a16 = ops_fpu_d8_a16; + x86_opcodes_d8_a32 = ops_fpu_d8_a32; + x86_opcodes_d9_a16 = ops_fpu_d9_a16; + x86_opcodes_d9_a32 = ops_fpu_d9_a32; + x86_opcodes_da_a16 = ops_fpu_da_a16; + x86_opcodes_da_a32 = ops_fpu_da_a32; + x86_opcodes_db_a16 = ops_fpu_db_a16; + x86_opcodes_db_a32 = ops_fpu_db_a32; + x86_opcodes_dc_a16 = ops_fpu_dc_a16; + x86_opcodes_dc_a32 = ops_fpu_dc_a32; + x86_opcodes_dd_a16 = ops_fpu_dd_a16; + x86_opcodes_dd_a32 = ops_fpu_dd_a32; + x86_opcodes_de_a16 = ops_fpu_de_a16; + x86_opcodes_de_a32 = ops_fpu_de_a32; + x86_opcodes_df_a16 = ops_fpu_df_a16; + x86_opcodes_df_a32 = ops_fpu_df_a32; + } } else { #ifdef USE_DYNAREC x86_dynarec_opcodes_d8_a16 = dynarec_ops_nofpu_a16; @@ -557,20 +596,37 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32; #endif - x86_opcodes_d9_a16 = ops_fpu_287_d9_a16; - x86_opcodes_d9_a32 = ops_fpu_287_d9_a32; - x86_opcodes_da_a16 = ops_fpu_287_da_a16; - x86_opcodes_da_a32 = ops_fpu_287_da_a32; - x86_opcodes_db_a16 = ops_fpu_287_db_a16; - x86_opcodes_db_a32 = ops_fpu_287_db_a32; - x86_opcodes_dc_a16 = ops_fpu_287_dc_a16; - x86_opcodes_dc_a32 = ops_fpu_287_dc_a32; - x86_opcodes_dd_a16 = ops_fpu_287_dd_a16; - x86_opcodes_dd_a32 = ops_fpu_287_dd_a32; - x86_opcodes_de_a16 = ops_fpu_287_de_a16; - x86_opcodes_de_a32 = ops_fpu_287_de_a32; - x86_opcodes_df_a16 = ops_fpu_287_df_a16; - x86_opcodes_df_a32 = ops_fpu_287_df_a32; + if (fpu_softfloat) { + x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16; + x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32; + x86_opcodes_da_a16 = ops_sf_fpu_287_da_a16; + x86_opcodes_da_a32 = ops_sf_fpu_287_da_a32; + x86_opcodes_db_a16 = ops_sf_fpu_287_db_a16; + x86_opcodes_db_a32 = ops_sf_fpu_287_db_a32; + x86_opcodes_dc_a16 = ops_sf_fpu_287_dc_a16; + x86_opcodes_dc_a32 = ops_sf_fpu_287_dc_a32; + x86_opcodes_dd_a16 = ops_sf_fpu_287_dd_a16; + x86_opcodes_dd_a32 = ops_sf_fpu_287_dd_a32; + x86_opcodes_de_a16 = ops_sf_fpu_287_de_a16; + x86_opcodes_de_a32 = ops_sf_fpu_287_de_a32; + x86_opcodes_df_a16 = ops_sf_fpu_287_df_a16; + x86_opcodes_df_a32 = ops_sf_fpu_287_df_a32; + } else { + x86_opcodes_d9_a16 = ops_fpu_287_d9_a16; + x86_opcodes_d9_a32 = ops_fpu_287_d9_a32; + x86_opcodes_da_a16 = ops_fpu_287_da_a16; + x86_opcodes_da_a32 = ops_fpu_287_da_a32; + x86_opcodes_db_a16 = ops_fpu_287_db_a16; + x86_opcodes_db_a32 = ops_fpu_287_db_a32; + x86_opcodes_dc_a16 = ops_fpu_287_dc_a16; + x86_opcodes_dc_a32 = ops_fpu_287_dc_a32; + x86_opcodes_dd_a16 = ops_fpu_287_dd_a16; + x86_opcodes_dd_a32 = ops_fpu_287_dd_a32; + x86_opcodes_de_a16 = ops_fpu_287_de_a16; + x86_opcodes_de_a32 = ops_fpu_287_de_a32; + x86_opcodes_df_a16 = ops_fpu_287_df_a16; + x86_opcodes_df_a32 = ops_fpu_287_df_a32; + } } timing_rr = 2; /* register dest - register src */ @@ -633,20 +689,37 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_287_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_287_df_a32; #endif - x86_opcodes_d9_a16 = ops_fpu_287_d9_a16; - x86_opcodes_d9_a32 = ops_fpu_287_d9_a32; - x86_opcodes_da_a16 = ops_fpu_287_da_a16; - x86_opcodes_da_a32 = ops_fpu_287_da_a32; - x86_opcodes_db_a16 = ops_fpu_287_db_a16; - x86_opcodes_db_a32 = ops_fpu_287_db_a32; - x86_opcodes_dc_a16 = ops_fpu_287_dc_a16; - x86_opcodes_dc_a32 = ops_fpu_287_dc_a32; - x86_opcodes_dd_a16 = ops_fpu_287_dd_a16; - x86_opcodes_dd_a32 = ops_fpu_287_dd_a32; - x86_opcodes_de_a16 = ops_fpu_287_de_a16; - x86_opcodes_de_a32 = ops_fpu_287_de_a32; - x86_opcodes_df_a16 = ops_fpu_287_df_a16; - x86_opcodes_df_a32 = ops_fpu_287_df_a32; + if (fpu_softfloat) { + x86_opcodes_d9_a16 = ops_sf_fpu_287_d9_a16; + x86_opcodes_d9_a32 = ops_sf_fpu_287_d9_a32; + x86_opcodes_da_a16 = ops_sf_fpu_287_da_a16; + x86_opcodes_da_a32 = ops_sf_fpu_287_da_a32; + x86_opcodes_db_a16 = ops_sf_fpu_287_db_a16; + x86_opcodes_db_a32 = ops_sf_fpu_287_db_a32; + x86_opcodes_dc_a16 = ops_sf_fpu_287_dc_a16; + x86_opcodes_dc_a32 = ops_sf_fpu_287_dc_a32; + x86_opcodes_dd_a16 = ops_sf_fpu_287_dd_a16; + x86_opcodes_dd_a32 = ops_sf_fpu_287_dd_a32; + x86_opcodes_de_a16 = ops_sf_fpu_287_de_a16; + x86_opcodes_de_a32 = ops_sf_fpu_287_de_a32; + x86_opcodes_df_a16 = ops_sf_fpu_287_df_a16; + x86_opcodes_df_a32 = ops_sf_fpu_287_df_a32; + } else { + x86_opcodes_d9_a16 = ops_fpu_287_d9_a16; + x86_opcodes_d9_a32 = ops_fpu_287_d9_a32; + x86_opcodes_da_a16 = ops_fpu_287_da_a16; + x86_opcodes_da_a32 = ops_fpu_287_da_a32; + x86_opcodes_db_a16 = ops_fpu_287_db_a16; + x86_opcodes_db_a32 = ops_fpu_287_db_a32; + x86_opcodes_dc_a16 = ops_fpu_287_dc_a16; + x86_opcodes_dc_a32 = ops_fpu_287_dc_a32; + x86_opcodes_dd_a16 = ops_fpu_287_dd_a16; + x86_opcodes_dd_a32 = ops_fpu_287_dd_a32; + x86_opcodes_de_a16 = ops_fpu_287_de_a16; + x86_opcodes_de_a32 = ops_fpu_287_de_a32; + x86_opcodes_df_a16 = ops_fpu_287_df_a16; + x86_opcodes_df_a32 = ops_fpu_287_df_a32; + } } timing_rr = 2; /* register dest - register src */ @@ -1048,12 +1121,21 @@ cpu_set(void) x86_dynarec_opcodes_df_a16 = dynarec_ops_fpu_686_df_a16; x86_dynarec_opcodes_df_a32 = dynarec_ops_fpu_686_df_a32; # endif - x86_opcodes_da_a16 = ops_fpu_686_da_a16; - x86_opcodes_da_a32 = ops_fpu_686_da_a32; - x86_opcodes_db_a16 = ops_fpu_686_db_a16; - x86_opcodes_db_a32 = ops_fpu_686_db_a32; - x86_opcodes_df_a16 = ops_fpu_686_df_a16; - x86_opcodes_df_a32 = ops_fpu_686_df_a32; + if (fpu_softfloat) { + x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16; + x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32; + x86_opcodes_db_a16 = ops_sf_fpu_686_db_a16; + x86_opcodes_db_a32 = ops_sf_fpu_686_db_a32; + x86_opcodes_df_a16 = ops_sf_fpu_686_df_a16; + x86_opcodes_df_a32 = ops_sf_fpu_686_df_a32; + } else { + x86_opcodes_da_a16 = ops_fpu_686_da_a16; + x86_opcodes_da_a32 = ops_fpu_686_da_a32; + x86_opcodes_db_a16 = ops_fpu_686_db_a16; + x86_opcodes_db_a32 = ops_fpu_686_db_a32; + x86_opcodes_df_a16 = ops_fpu_686_df_a16; + x86_opcodes_df_a32 = ops_fpu_686_df_a32; + } } # ifdef USE_DYNAREC @@ -1256,12 +1338,21 @@ cpu_set(void) else x86_setopcodes(ops_386, ops_pentium2_0f); #endif - x86_opcodes_da_a16 = ops_fpu_686_da_a16; - x86_opcodes_da_a32 = ops_fpu_686_da_a32; - x86_opcodes_db_a16 = ops_fpu_686_db_a16; - x86_opcodes_db_a32 = ops_fpu_686_db_a32; - x86_opcodes_df_a16 = ops_fpu_686_df_a16; - x86_opcodes_df_a32 = ops_fpu_686_df_a32; + if (fpu_softfloat) { + x86_opcodes_da_a16 = ops_sf_fpu_686_da_a16; + x86_opcodes_da_a32 = ops_sf_fpu_686_da_a32; + x86_opcodes_db_a16 = ops_sf_fpu_686_db_a16; + x86_opcodes_db_a32 = ops_sf_fpu_686_db_a32; + x86_opcodes_df_a16 = ops_sf_fpu_686_df_a16; + x86_opcodes_df_a32 = ops_sf_fpu_686_df_a32; + } else { + x86_opcodes_da_a16 = ops_fpu_686_da_a16; + x86_opcodes_da_a32 = ops_fpu_686_da_a32; + x86_opcodes_db_a16 = ops_fpu_686_db_a16; + x86_opcodes_db_a32 = ops_fpu_686_db_a32; + x86_opcodes_df_a16 = ops_fpu_686_df_a16; + x86_opcodes_df_a32 = ops_fpu_686_df_a32; + } timing_rr = 1; /* register dest - register src */ timing_rm = 2; /* register dest - memory src */ diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index f8ffaced3..b93e6aa2e 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -21,6 +21,8 @@ #ifndef EMU_CPU_H #define EMU_CPU_H +#include "softfloat/softfloat.h" + enum { FPU_NONE, FPU_8087, @@ -404,6 +406,20 @@ typedef struct { uint8_t inside_emulation_mode; } cpu_state_t; +typedef struct { + uint16_t cwd; + uint16_t swd; + uint16_t tag; + uint16_t foo; + uint32_t fip; + uint32_t fdp; + uint16_t fcs; + uint16_t fds; + floatx80 st_space[8]; + unsigned char tos; + unsigned char align1, align2, align3; +} fpu_state_t; + #define in_smm cpu_state._in_smm #define smi_line cpu_state._smi_line @@ -484,6 +500,7 @@ COMPILE_TIME_ASSERT(sizeof(cpu_state_t) <= 128) /* Global variables. */ extern cpu_state_t cpu_state; +extern fpu_state_t fpu_state; extern const cpu_family_t cpu_families[]; extern const cpu_legacy_machine_t cpu_legacy_table[]; @@ -738,6 +755,8 @@ extern uint32_t custom_nmi_vector; extern void (*cpu_exec)(int cycs); extern uint8_t do_translate, do_translate2; +extern void SF_FPU_reset(void); + extern void reset_808x(int hard); extern void interrupt_808x(uint16_t addr); diff --git a/src/cpu/softfloat/CMakeLists.txt b/src/cpu/softfloat/CMakeLists.txt new file mode 100644 index 000000000..62458aada --- /dev/null +++ b/src/cpu/softfloat/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box distribution. +# +# CMake build script. +# +# Authors: David Hrdlička, +# +# Copyright 2020-2021 David Hrdlička. +# + +add_library(softfloat OBJECT f2xm1.cc fpatan.cc fprem.cc fsincos.cc fyl2x.cc poly.cc softfloat.cc softfloat16.cc + softfloat-muladd.cc softfloat-round-pack.cc softfloat-specialize.cc softfloatx80.cc) diff --git a/src/cpu/softfloat/config.h b/src/cpu/softfloat/config.h new file mode 100644 index 000000000..3889b5c02 --- /dev/null +++ b/src/cpu/softfloat/config.h @@ -0,0 +1,46 @@ +#include + +typedef int8_t flag; +typedef uint8_t uint8; +typedef int8_t int8; +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint32_t uint32; +typedef int32_t int32; +typedef uint64_t uint64; +typedef int64_t int64; + +/*---------------------------------------------------------------------------- +| Each of the following `typedef's defines a type that holds integers +| of _exactly_ the number of bits specified. For instance, for most +| implementation of C, `bits16' and `sbits16' should be `typedef'ed to +| `unsigned short int' and `signed short int' (or `short int'), respectively. +*----------------------------------------------------------------------------*/ +typedef uint8_t bits8; +typedef int8_t sbits8; +typedef uint16_t bits16; +typedef int16_t sbits16; +typedef uint32_t bits32; +typedef int32_t sbits32; +typedef uint64_t bits64; +typedef int64_t sbits64; + +typedef uint8_t Bit8u; +typedef int8_t Bit8s; +typedef uint16_t Bit16u; +typedef int16_t Bit16s; +typedef uint32_t Bit32u; +typedef int32_t Bit32s; +typedef uint64_t Bit64u; +typedef int64_t Bit64s; + +/*---------------------------------------------------------------------------- +| The `LIT64' macro takes as its argument a textual integer literal and +| if necessary ``marks'' the literal as having a 64-bit integer type. +| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be +| appended with the letters `LL' standing for `long long', which is `gcc's +| name for the 64-bit integer type. Some compilers may allow `LIT64' to be +| defined as the identity macro: `#define LIT64( a ) a'. +*----------------------------------------------------------------------------*/ +#define BX_CONST64(a) a##LL +#define BX_CPP_INLINE static __inline diff --git a/src/cpu/softfloat/f2xm1.cc b/src/cpu/softfloat/f2xm1.cc new file mode 100644 index 000000000..ed4af1d12 --- /dev/null +++ b/src/cpu/softfloat/f2xm1.cc @@ -0,0 +1,182 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#include "softfloatx80.h" +#include "softfloat-round-pack.h" + +static const floatx80 floatx80_negone = packFloatx80(1, 0x3fff, BX_CONST64(0x8000000000000000)); +static const floatx80 floatx80_neghalf = packFloatx80(1, 0x3ffe, BX_CONST64(0x8000000000000000)); +static const float128 float128_ln2 = + packFloat128(BX_CONST64(0x3ffe62e42fefa39e), BX_CONST64(0xf35793c7673007e6)); + +#ifdef BETTER_THAN_PENTIUM + +#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab) +#define LN2_SIG_LO BX_CONST64(0xc9e3b39800000000) /* 96 bit precision */ + +#else + +#define LN2_SIG_HI BX_CONST64(0xb17217f7d1cf79ab) +#define LN2_SIG_LO BX_CONST64(0xc000000000000000) /* 67-bit precision */ + +#endif + +#define EXP_ARR_SIZE 15 + +static float128 exp_arr[EXP_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /* 2 */ + PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ + PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ + PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ + PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ + PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ + PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ + PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ + PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ + PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ + PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ + PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ + PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 15 */ +}; + +extern float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status); + +/* required -1 < x < 1 */ +static float128 poly_exp(float128 x, struct float_status_t *status) +{ +/* + // 2 3 4 5 6 7 8 9 + // x x x x x x x x x + // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ... + // 2! 3! 4! 5! 6! 7! 8! 9! + // + // 2 3 4 5 6 7 8 + // x x x x x x x x + // = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ] + // 2! 3! 4! 5! 6! 7! 8! 9! + // + // 8 8 + // -- 2k -- 2k+1 + // p(x) = > C * x q(x) = > C * x + // -- 2k -- 2k+1 + // k=0 k=0 + // + // x + // e - 1 ~ x * [ p(x) + x * q(x) ] + // +*/ + float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE, status); + return float128_mul(t, x, status); +} + +// ================================================= +// x +// FX2M1 Compute 2 - 1 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// x x*ln(2) +// 2 = e +// +// 2. ---------------------------------------------------------- +// 2 3 4 5 n +// x x x x x x x +// e = 1 + --- + --- + --- + --- + --- + ... + --- + ... +// 1! 2! 3! 4! 5! n! +// + +floatx80 f2xm1(floatx80 a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + Bit64u zSig0, zSig1, zSig2; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) + return propagateFloatx80NaNOne(a, status); + + return (aSign) ? floatx80_negone : a; + } + + if (aExp == 0) { + if (aSig == 0) return a; + float_raise(status, float_flag_denormal | float_flag_inexact); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + + tiny_argument: + mul128By64To192(LN2_SIG_HI, LN2_SIG_LO, aSig, &zSig0, &zSig1, &zSig2); + if (0 < (Bit64s) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --aExp; + } + return + roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1, status); + } + + float_raise(status, float_flag_inexact); + + if (aExp < 0x3FFF) + { + if (aExp < FLOATX80_EXP_BIAS-68) + goto tiny_argument; + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + float128 x = floatx80_to_float128(a, status); + x = float128_mul(x, float128_ln2, status); + x = poly_exp(x, status); + return float128_to_floatx80(x, status); + } + else + { + if (a.exp == 0xBFFF && ! (aSig<<1)) + return floatx80_neghalf; + + return a; + } +} diff --git a/src/cpu/softfloat/fpatan.cc b/src/cpu/softfloat/fpatan.cc new file mode 100644 index 000000000..f33a3ff66 --- /dev/null +++ b/src/cpu/softfloat/fpatan.cc @@ -0,0 +1,288 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#include "softfloatx80.h" +#include "softfloat-round-pack.h" +#include "fpu_constant.h" + +#define FPATAN_ARR_SIZE 11 + +static const float128 float128_one = + packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)); +static const float128 float128_sqrt3 = + packFloat128(BX_CONST64(0x3fffbb67ae8584ca), BX_CONST64(0xa73b25742d7078b8)); +static const floatx80 floatx80_pi = + packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235)); + +static const float128 float128_pi2 = + packFloat128(BX_CONST64(0x3fff921fb54442d1), BX_CONST64(0x8469898CC5170416)); +static const float128 float128_pi4 = + packFloat128(BX_CONST64(0x3ffe921fb54442d1), BX_CONST64(0x8469898CC5170416)); +static const float128 float128_pi6 = + packFloat128(BX_CONST64(0x3ffe0c152382d736), BX_CONST64(0x58465BB32E0F580F)); + +static float128 atan_arr[FPATAN_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */ + PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /* 7 */ + PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */ + PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */ + PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */ + PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */ + PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */ + PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */ + PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618) /* 21 */ +}; + +extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status); + +/* |x| < 1/4 */ +static float128 poly_atan(float128 x1, struct float_status_t *status) +{ +/* + // 3 5 7 9 11 13 15 17 + // x x x x x x x x + // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ---- + // 3 5 7 9 11 13 15 17 + // + // 2 4 6 8 10 12 14 16 + // x x x x x x x x + // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ] + // 3 5 7 9 11 13 15 17 + // + // 5 5 + // -- 4k -- 4k+2 + // p(x) = > C * x q(x) = > C * x + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 2 + // atan(x) ~ x * [ p(x) + x * q(x) ] + // +*/ + return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE, status); +} + +// ================================================= +// FPATAN Compute y * log (x) +// 2 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// +// atan(-x) = -atan(x) +// +// 2. ---------------------------------------------------------- +// +// x + y +// atan(x) + atan(y) = atan -------, xy < 1 +// 1-xy +// +// x + y +// atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1 +// 1-xy +// +// x + y +// atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1 +// 1-xy +// +// 3. ---------------------------------------------------------- +// +// atan(x) = atan(INF) + atan(- 1/x) +// +// x-1 +// atan(x) = PI/4 + atan( ----- ) +// x+1 +// +// x * sqrt(3) - 1 +// atan(x) = PI/6 + atan( ----------------- ) +// x + sqrt(3) +// +// 4. ---------------------------------------------------------- +// 3 5 7 9 2n+1 +// x x x x n x +// atan(x) = x - --- + --- - --- + --- - ... + (-1) ------ + ... +// 3 5 7 9 2n+1 +// + +floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + Bit64u bSig = extractFloatx80Frac(b); + Bit32s bExp = extractFloatx80Exp(b); + int bSign = extractFloatx80Sign(b); + + int zSign = aSign ^ bSign; + + if (bExp == 0x7FFF) + { + if ((Bit64u) (bSig<<1)) + return propagateFloatx80NaN(a, b, status); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) + return propagateFloatx80NaN(a, b, status); + + if (aSign) { /* return 3PI/4 */ + return roundAndPackFloatx80(80, bSign, + FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO, status); + } + else { /* return PI/4 */ + return roundAndPackFloatx80(80, bSign, + FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status); + } + } + + if (aSig && (aExp == 0)) + float_raise(status, float_flag_denormal); + + /* return PI/2 */ + return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status); + } + if (aExp == 0x7FFF) + { + if ((Bit64u) (aSig<<1)) + return propagateFloatx80NaN(a, b, status); + + if (bSig && (bExp == 0)) + float_raise(status, float_flag_denormal); + +return_PI_or_ZERO: + + if (aSign) { /* return PI */ + return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status); + } else { /* return 0 */ + return packFloatx80(bSign, 0, 0); + } + } + if (bExp == 0) + { + if (bSig == 0) { + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + goto return_PI_or_ZERO; + } + + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0) + { + if (aSig == 0) /* return PI/2 */ + return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status); + + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + float_raise(status, float_flag_inexact); + + /* |a| = |b| ==> return PI/4 */ + if (aSig == bSig && aExp == bExp) + return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status); + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + float128 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0, status); + float128 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0, status); + float128 x; + int swap = 0, add_pi6 = 0, add_pi4 = 0; + + if (aExp > bExp || (aExp == bExp && aSig > bSig)) + { + x = float128_div(b128, a128, status); + } + else { + x = float128_div(a128, b128, status); + swap = 1; + } + + Bit32s xExp = extractFloat128Exp(x); + + if (xExp <= FLOATX80_EXP_BIAS-40) + goto approximation_completed; + + if (x.hi >= BX_CONST64(0x3ffe800000000000)) // 3/4 < x < 1 + { + /* + arctan(x) = arctan((x-1)/(x+1)) + pi/4 + */ + float128 t1 = float128_sub(x, float128_one, status); + float128 t2 = float128_add(x, float128_one, status); + x = float128_div(t1, t2, status); + add_pi4 = 1; + } + else + { + /* argument correction */ + if (xExp >= 0x3FFD) // 1/4 < x < 3/4 + { + /* + arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6 + */ + float128 t1 = float128_mul(x, float128_sqrt3, status); + float128 t2 = float128_add(x, float128_sqrt3, status); + x = float128_sub(t1, float128_one, status); + x = float128_div(x, t2, status); + add_pi6 = 1; + } + } + + x = poly_atan(x, status); + if (add_pi6) x = float128_add(x, float128_pi6, status); + if (add_pi4) x = float128_add(x, float128_pi4, status); + +approximation_completed: + if (swap) x = float128_sub(float128_pi2, x, status); + floatx80 result = float128_to_floatx80(x, status); + if (zSign) floatx80_chs(result); + int rSign = extractFloatx80Sign(result); + if (!bSign && rSign) + return floatx80_add(result, floatx80_pi, status); + if (bSign && !rSign) + return floatx80_sub(result, floatx80_pi, status); + return result; +} diff --git a/src/cpu/softfloat/fprem.cc b/src/cpu/softfloat/fprem.cc new file mode 100644 index 000000000..26637c5c5 --- /dev/null +++ b/src/cpu/softfloat/fprem.cc @@ -0,0 +1,196 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloatx80.h" +#include "softfloat-round-pack.h" +#define USE_estimateDiv128To64 +#include "softfloat-macros.h" + +/* executes single exponent reduction cycle */ +static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1) +{ + Bit64u term0, term1; + Bit64u aSig1 = 0; + + shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0); + Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig); + mul64To128(bSig, q, &term0, &term1); + sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); + while ((Bit64s)(*zSig1) < 0) { + --q; + add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); + } + return q; +} + +static int do_fprem(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, int rounding_mode, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp, zExp, expDiff; + Bit64u aSig0, aSig1, bSig; + int aSign; + *q = 0; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(status, float_flag_invalid); + *r = floatx80_default_nan; + return -1; + } + + aSig0 = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) { + *r = propagateFloatx80NaN(a, b, status); + return -1; + } + float_raise(status, float_flag_invalid); + *r = floatx80_default_nan; + return -1; + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) { + *r = propagateFloatx80NaN(a, b, status); + return -1; + } + if (aExp == 0 && aSig0) { + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + *r = (a.fraction & BX_CONST64(0x8000000000000000)) ? + packFloatx80(aSign, aExp, aSig0) : a; + return 0; + } + *r = a; + return 0; + + } + if (bExp == 0) { + if (bSig == 0) { + float_raise(status, float_flag_invalid); + *r = floatx80_default_nan; + return -1; + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0) { + if (aSig0 == 0) { + *r = a; + return 0; + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + expDiff = aExp - bExp; + aSig1 = 0; + + Bit32u overflow = 0; + + if (expDiff >= 64) { + int n = (expDiff & 0x1f) | 0x20; + remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1); + zExp = aExp - n; + overflow = 1; + } + else { + zExp = bExp; + + if (expDiff < 0) { + if (expDiff < -1) { + *r = (a.fraction & BX_CONST64(0x8000000000000000)) ? + packFloatx80(aSign, aExp, aSig0) : a; + return 0; + } + shift128Right(aSig0, 0, 1, &aSig0, &aSig1); + expDiff = 0; + } + + if (expDiff > 0) { + *q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1); + } + else { + if (bSig <= aSig0) { + aSig0 -= bSig; + *q = 1; + } + } + + if (rounding_mode == float_round_nearest_even) + { + Bit64u term0, term1; + shift128Right(bSig, 0, 1, &term0, &term1); + + if (! lt128(aSig0, aSig1, term0, term1)) + { + int lt = lt128(term0, term1, aSig0, aSig1); + int eq = eq128(aSig0, aSig1, term0, term1); + + if ((eq && ((*q) & 1)) || lt) { + aSign = !aSign; + ++(*q); + } + if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1); + } + } + } + + *r = normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status); + return overflow; +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status) +{ + return do_fprem(a, b, r, q, float_round_nearest_even, status); +} + +/*---------------------------------------------------------------------------- +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. Unlike previous function +| the function does not compute the remainder specified in the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. This function operates +| differently from the previous function in the way that it rounds the +| quotient of 'a' divided by 'b' to an integer. +*----------------------------------------------------------------------------*/ + +int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status) +{ + return do_fprem(a, b, r, q, float_round_to_zero, status); +} diff --git a/src/cpu/softfloat/fpu_constant.h b/src/cpu/softfloat/fpu_constant.h new file mode 100644 index 000000000..7a7fc6f1a --- /dev/null +++ b/src/cpu/softfloat/fpu_constant.h @@ -0,0 +1,82 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#ifndef _FPU_CONSTANTS_H_ +#define _FPU_CONSTANTS_H_ + +#include "config.h" + +// Pentium CPU uses only 68-bit precision M_PI approximation +//#define BETTER_THAN_PENTIUM + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +////////////////////////////// +// PI, PI/2, PI/4 constants +////////////////////////////// + +#define FLOATX80_PI_EXP (0x4000) + +// 128-bit PI fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234)) +#define FLOAT_PI_LO (BX_CONST64(0xc4c6628b80dc1cd1)) +#else +#define FLOAT_PI_HI (BX_CONST64(0xc90fdaa22168c234)) +#define FLOAT_PI_LO (BX_CONST64(0xC000000000000000)) +#endif + +#define FLOATX80_PI2_EXP (0x3FFF) +#define FLOATX80_PI4_EXP (0x3FFE) + +////////////////////////////// +// 3PI/4 constant +////////////////////////////// + +#define FLOATX80_3PI4_EXP (0x4000) + +// 128-bit 3PI/4 fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7)) +#define FLOAT_3PI4_LO (BX_CONST64(0x9394c9e8a0a5159c)) +#else +#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7)) +#define FLOAT_3PI4_LO (BX_CONST64(0x9000000000000000)) +#endif + +////////////////////////////// +// 1/LN2 constant +////////////////////////////// + +#define FLOAT_LN2INV_EXP (0x3FFF) + +// 128-bit 1/LN2 fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb)) +#define FLOAT_LN2INV_LO (BX_CONST64(0xbe87fed0691d3e89)) +#else +#define FLOAT_LN2INV_HI (BX_CONST64(0xb8aa3b295c17f0bb)) +#define FLOAT_LN2INV_LO (BX_CONST64(0xC000000000000000)) +#endif + +#endif diff --git a/src/cpu/softfloat/fsincos.cc b/src/cpu/softfloat/fsincos.cc new file mode 100644 index 000000000..f5b33a823 --- /dev/null +++ b/src/cpu/softfloat/fsincos.cc @@ -0,0 +1,441 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#define USE_estimateDiv128To64 +#include "softfloatx80.h" +#include "softfloat-round-pack.h" +#include "fpu_constant.h" + +static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000)); + +/* reduce trigonometric function argument using 128-bit precision + M_PI approximation */ +static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1) +{ + Bit64u term0, term1, term2; + Bit64u aSig1 = 0; + + shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); + Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); + mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); + sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); + while ((Bit64s)(*zSig1) < 0) { + --q; + add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); + } + *zSig1 = term2; + return q; +} + +static int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1) +{ + Bit64u term0, term1, q = 0; + + if (expDiff < 0) { + shift128Right(*aSig0, 0, 1, aSig0, aSig1); + expDiff = 0; + } + if (expDiff > 0) { + q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1); + } + else { + if (FLOAT_PI_HI <= *aSig0) { + *aSig0 -= FLOAT_PI_HI; + q = 1; + } + } + + shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); + if (! lt128(*aSig0, *aSig1, term0, term1)) + { + int lt = lt128(term0, term1, *aSig0, *aSig1); + int eq = eq128(*aSig0, *aSig1, term0, term1); + + if ((eq && (q & 1)) || lt) { + *zSign = !(*zSign); + ++q; + } + if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1); + } + + return (int)(q & 3); +} + +#define SIN_ARR_SIZE 11 +#define COS_ARR_SIZE 11 + +static float128 sin_arr[SIN_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ + PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ + PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ + PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ + PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ + PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */ + PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */ + PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */ + PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */ +}; + +static float128 cos_arr[COS_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */ + PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */ + PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ + PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ + PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ + PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ + PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ + PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ + PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */ + PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */ + PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */ +}; + +extern float128 OddPoly (float128 x, float128 *arr, int n, struct float_status_t *status); + +/* 0 <= x <= pi/4 */ +BX_CPP_INLINE float128 poly_sin(float128 x, struct float_status_t *status) +{ + // 3 5 7 9 11 13 15 + // x x x x x x x + // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- = + // 3! 5! 7! 9! 11! 13! 15! + // + // 2 4 6 8 10 12 14 + // x x x x x x x + // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] = + // 3! 5! 7! 9! 11! 13! 15! + // + // 3 3 + // -- 4k -- 4k+2 + // p(x) = > C * x > 0 q(x) = > C * x < 0 + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 2 + // sin(x) ~ x * [ p(x) + x * q(x) ] + // + + return OddPoly(x, sin_arr, SIN_ARR_SIZE, status); +} + +extern float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status); + +/* 0 <= x <= pi/4 */ +BX_CPP_INLINE float128 poly_cos(float128 x, struct float_status_t *status) +{ + // 2 4 6 8 10 12 14 + // x x x x x x x + // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ---- + // 2! 4! 6! 8! 10! 12! 14! + // + // 3 3 + // -- 4k -- 4k+2 + // p(x) = > C * x > 0 q(x) = > C * x < 0 + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 2 + // cos(x) ~ [ p(x) + x * q(x) ] + // + + return EvenPoly(x, cos_arr, COS_ARR_SIZE, status); +} + +BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) +{ + if (sin_a) *sin_a = a; + if (cos_a) *cos_a = a; +} + +BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) +{ + if (sin_a) *sin_a = a; + if (cos_a) *cos_a = floatx80_one; +} + +static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, struct float_status_t *status) +{ + if (quotient & 0x1) { + r = poly_cos(r, status); + neg = 0; + } else { + r = poly_sin(r, status); + } + + floatx80 result = float128_to_floatx80(r, status); + if (quotient & 0x2) + neg = ! neg; + + if (neg) + floatx80_chs(result); + + return result; +} + +// ================================================= +// FSINCOS Compute sin(x) and cos(x) +// ================================================= + +// +// Uses the following identities: +// ---------------------------------------------------------- +// +// sin(-x) = -sin(x) +// cos(-x) = cos(x) +// +// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) +// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) +// +// sin(x+ pi/2) = cos(x) +// sin(x+ pi) = -sin(x) +// sin(x+3pi/2) = -cos(x) +// sin(x+2pi) = sin(x) +// + +int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit64u aSig0, aSig1 = 0; + Bit32s aExp, zExp, expDiff; + int aSign, zSign; + int q = 0; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) { + goto invalid; + } + + aSig0 = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + /* invalid argument */ + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig0<<1)) { + sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOne(a, status)); + return 0; + } + + invalid: + float_raise(status, float_flag_invalid); + sincos_invalid(sin_a, cos_a, floatx80_default_nan); + return 0; + } + + if (aExp == 0) { + if (aSig0 == 0) { + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + + float_raise(status, float_flag_denormal); + + /* handle pseudo denormals */ + if (! (aSig0 & BX_CONST64(0x8000000000000000))) + { + float_raise(status, float_flag_inexact); + if (sin_a) + float_raise(status, float_flag_underflow); + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + + zSign = aSign; + zExp = FLOATX80_EXP_BIAS; + expDiff = aExp - zExp; + + /* argument is out-of-range */ + if (expDiff >= 63) + return -1; + + float_raise(status, float_flag_inexact); + + if (expDiff < -1) { // doesn't require reduction + if (expDiff <= -68) { + a = packFloatx80(aSign, aExp, aSig0); + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + zExp = aExp; + } + else { + q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); + } + + /* **************************** */ + /* argument reduction completed */ + /* **************************** */ + + /* using float128 for approximation */ + float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status); + + if (aSign) q = -q; + if (sin_a) *sin_a = sincos_approximation(zSign, r, q, status); + if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status); + + return 0; +} + +int fsin(floatx80 *a, struct float_status_t *status) +{ + return fsincos(*a, a, 0, status); +} + +int fcos(floatx80 *a, struct float_status_t *status) +{ + return fsincos(*a, 0, a, status); +} + +// ================================================= +// FPTAN Compute tan(x) +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// +// sin(-x) = -sin(x) +// cos(-x) = cos(x) +// +// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) +// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) +// +// sin(x+ pi/2) = cos(x) +// sin(x+ pi) = -sin(x) +// sin(x+3pi/2) = -cos(x) +// sin(x+2pi) = sin(x) +// +// 2. ---------------------------------------------------------- +// +// sin(x) +// tan(x) = ------ +// cos(x) +// + +int ftan(floatx80 *a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit64u aSig0, aSig1 = 0; + Bit32s aExp, zExp, expDiff; + int aSign, zSign; + int q = 0; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(*a)) { + goto invalid; + } + + aSig0 = extractFloatx80Frac(*a); + aExp = extractFloatx80Exp(*a); + aSign = extractFloatx80Sign(*a); + + /* invalid argument */ + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig0<<1)) + { + *a = propagateFloatx80NaNOne(*a, status); + return 0; + } + + invalid: + float_raise(status, float_flag_invalid); + *a = floatx80_default_nan; + return 0; + } + + if (aExp == 0) { + if (aSig0 == 0) return 0; + float_raise(status, float_flag_denormal); + /* handle pseudo denormals */ + if (! (aSig0 & BX_CONST64(0x8000000000000000))) + { + float_raise(status, float_flag_inexact | float_flag_underflow); + return 0; + } + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + + zSign = aSign; + zExp = FLOATX80_EXP_BIAS; + expDiff = aExp - zExp; + + /* argument is out-of-range */ + if (expDiff >= 63) + return -1; + + float_raise(status, float_flag_inexact); + + if (expDiff < -1) { // doesn't require reduction + if (expDiff <= -68) { + *a = packFloatx80(aSign, aExp, aSig0); + return 0; + } + zExp = aExp; + } + else { + q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); + } + + /* **************************** */ + /* argument reduction completed */ + /* **************************** */ + + /* using float128 for approximation */ + float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status); + + float128 sin_r = poly_sin(r, status); + float128 cos_r = poly_cos(r, status); + + if (q & 0x1) { + r = float128_div(cos_r, sin_r, status); + zSign = ! zSign; + } else { + r = float128_div(sin_r, cos_r, status); + } + + *a = float128_to_floatx80(r, status); + if (zSign) + floatx80_chs(*a); + + return 0; +} diff --git a/src/cpu/softfloat/fyl2x.cc b/src/cpu/softfloat/fyl2x.cc new file mode 100644 index 000000000..875f866a9 --- /dev/null +++ b/src/cpu/softfloat/fyl2x.cc @@ -0,0 +1,363 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#include "softfloatx80.h" +#include "softfloat-round-pack.h" +#include "fpu_constant.h" + +static const floatx80 floatx80_one = + packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000)); + +static const float128 float128_one = + packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)); +static const float128 float128_two = + packFloat128(BX_CONST64(0x4000000000000000), BX_CONST64(0x0000000000000000)); + +static const float128 float128_ln2inv2 = + packFloat128(BX_CONST64(0x400071547652b82f), BX_CONST64(0xe1777d0ffda0d23a)); + +#define SQRT2_HALF_SIG BX_CONST64(0xb504f333f9de6484) + +extern float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status); + +#define L2_ARR_SIZE 9 + +static float128 ln_arr[L2_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */ + PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */ + PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */ + PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */ + PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */ + PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */ + PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */ +}; + +static float128 poly_ln(float128 x1, struct float_status_t *status) +{ +/* + // + // 3 5 7 9 11 13 15 + // 1+u u u u u u u u + // 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- = + // 1-u 3 5 7 9 11 13 15 + // + // 2 4 6 8 10 12 14 + // u u u u u u u + // = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] = + // 3 5 7 9 11 13 15 + // + // 3 3 + // -- 4k -- 4k+2 + // p(u) = > C * u q(u) = > C * u + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 1+u 2 + // 1/2 ln --- ~ u * [ p(u) + u * q(u) ] + // 1-u + // +*/ + return OddPoly(x1, ln_arr, L2_ARR_SIZE, status); +} + +/* required sqrt(2)/2 < x < sqrt(2) */ +static float128 poly_l2(float128 x, struct float_status_t *status) +{ + /* using float128 for approximation */ + float128 x_p1 = float128_add(x, float128_one, status); + float128 x_m1 = float128_sub(x, float128_one, status); + x = float128_div(x_m1, x_p1, status); + x = poly_ln(x, status); + x = float128_mul(x, float128_ln2inv2, status); + return x; +} + +static float128 poly_l2p1(float128 x, struct float_status_t *status) +{ + /* using float128 for approximation */ + float128 x_p2 = float128_add(x, float128_two, status); + x = float128_div(x, x_p2, status); + x = poly_ln(x, status); + x = float128_mul(x, float128_ln2inv2, status); + return x; +} + +// ================================================= +// FYL2X Compute y * log (x) +// 2 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// ln(x) +// log (x) = -------, ln (x*y) = ln(x) + ln(y) +// 2 ln(2) +// +// 2. ---------------------------------------------------------- +// 1+u x-1 +// ln (x) = ln -----, when u = ----- +// 1-u x+1 +// +// 3. ---------------------------------------------------------- +// 3 5 7 2n+1 +// 1+u u u u u +// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] +// 1-u 3 5 7 2n+1 +// + +floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { +invalid: + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + Bit64u bSig = extractFloatx80Frac(b); + Bit32s bExp = extractFloatx80Exp(b); + int bSign = extractFloatx80Sign(b); + + int zSign = bSign ^ 1; + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) + || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) + { + return propagateFloatx80NaN(a, b, status); + } + if (aSign) goto invalid; + else { + if (bExp == 0) { + if (bSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + } + if (bExp == 0x7FFF) + { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if (aSign && (Bit64u)(aExp | aSig)) goto invalid; + if (aSig && (aExp == 0)) + float_raise(status, float_flag_denormal); + if (aExp < 0x3FFF) { + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid; + return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if ((bExp | bSig) == 0) goto invalid; + float_raise(status, float_flag_divbyzero); + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aSign) goto invalid; + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (aSign) goto invalid; + if (bExp == 0) { + if (bSig == 0) { + if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0); + return packFloatx80(bSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) + return packFloatx80(bSign, 0, 0); + + float_raise(status, float_flag_inexact); + + int ExpDiff = aExp - 0x3FFF; + aExp = 0; + if (aSig >= SQRT2_HALF_SIG) { + ExpDiff++; + aExp--; + } + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + Bit64u zSig0, zSig1; + shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); + float128 x = packFloat128Four(0, aExp+0x3FFF, zSig0, zSig1); + x = poly_l2(x, status); + x = float128_add(x, int64_to_float128((Bit64s) ExpDiff), status); + return floatx80_128_mul(b, x, status); +} + +// ================================================= +// FYL2XP1 Compute y * log (x + 1) +// 2 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// ln(x) +// log (x) = ------- +// 2 ln(2) +// +// 2. ---------------------------------------------------------- +// 1+u x +// ln (x+1) = ln -----, when u = ----- +// 1-u x+2 +// +// 3. ---------------------------------------------------------- +// 3 5 7 2n+1 +// 1+u u u u u +// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] +// 1-u 3 5 7 2n+1 +// + +floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp; + Bit64u aSig, bSig, zSig0, zSig1, zSig2; + int aSign, bSign; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { +invalid: + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + int zSign = aSign ^ bSign; + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) + || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) + { + return propagateFloatx80NaN(a, b, status); + } + if (aSign) goto invalid; + else { + if (bExp == 0) { + if (bSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + } + if (bExp == 0x7FFF) + { + if ((Bit64u) (bSig<<1)) + return propagateFloatx80NaN(a, b, status); + + if (aExp == 0) { + if (aSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return packFloatx80(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + + float_raise(status, float_flag_inexact); + + if (aSign && aExp >= 0x3FFF) + return a; + + if (aExp >= 0x3FFC) // big argument + { + return fyl2x(floatx80_add(a, floatx80_one, status), b, status); + } + + // handle tiny argument + if (aExp < FLOATX80_EXP_BIAS-70) + { + // first order approximation, return (a*b)/ln(2) + Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; + + mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); + if (0 < (Bit64s) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + + zExp = zExp + bExp - 0x3FFE; + mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); + if (0 < (Bit64s) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + + return + roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1, status); + } + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); + float128 x = packFloat128Four(aSign, aExp, zSig0, zSig1); + x = poly_l2p1(x, status); + return floatx80_128_mul(b, x, status); +} diff --git a/src/cpu/softfloat/poly.cc b/src/cpu/softfloat/poly.cc new file mode 100644 index 000000000..5c7079353 --- /dev/null +++ b/src/cpu/softfloat/poly.cc @@ -0,0 +1,89 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#include +#include "softfloat.h" + +// 2 3 4 n +// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 2k -- 2k+1 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// f(x) ~ [ p(x) + x * q(x) ] +// + +float128 EvalPoly(float128 x, float128 *arr, int n, struct float_status_t *status) +{ + float128 r = arr[--n]; + + do { + r = float128_mul(r, x, status); + r = float128_add(r, arr[--n], status); + } while (n > 0); + + return r; +} + +// 2 4 6 8 2n +// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 4k -- 4k+2 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// 2 +// f(x) ~ [ p(x) + x * q(x) ] +// + +float128 EvenPoly(float128 x, float128 *arr, int n, struct float_status_t *status) +{ + return EvalPoly(float128_mul(x, x, status), arr, n, status); +} + +// 3 5 7 9 2n+1 +// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// 2 4 6 8 2n +// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 4k -- 4k+2 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// 2 +// f(x) ~ x * [ p(x) + x * q(x) ] +// + +float128 OddPoly(float128 x, float128 *arr, int n, struct float_status_t *status) +{ + return float128_mul(x, EvenPoly(x, arr, n, status), status); +} diff --git a/src/cpu/softfloat/softfloat-compare.h b/src/cpu/softfloat/softfloat-compare.h new file mode 100644 index 000000000..8b9821460 --- /dev/null +++ b/src/cpu/softfloat/softfloat-compare.h @@ -0,0 +1,496 @@ +/*============================================================================ +This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#ifndef _SOFTFLOAT_COMPARE_H_ +#define _SOFTFLOAT_COMPARE_H_ + +#include "softfloat.h" + +// ======= float32 ======= // + +typedef int (*float32_compare_method)(float32, float32, struct float_status_t *status); + +// 0x00 +BX_CPP_INLINE int float32_eq_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_equal); +} + +// 0x01 +BX_CPP_INLINE int float32_lt_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_less); +} + +// 0x02 +BX_CPP_INLINE int float32_le_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_equal); +} + +// 0x03 +BX_CPP_INLINE int float32_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_unordered); +} + +// 0x04 +BX_CPP_INLINE int float32_neq_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_equal); +} + +// 0x05 +BX_CPP_INLINE int float32_nlt_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_less); +} + +// 0x06 +BX_CPP_INLINE int float32_nle_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_less) && (relation != float_relation_equal); +} + +// 0x07 +BX_CPP_INLINE int float32_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_unordered); +} + +// 0x08 +BX_CPP_INLINE int float32_eq_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_equal) || (relation == float_relation_unordered); +} + +// 0x09 +BX_CPP_INLINE int float32_nge_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_unordered); +} + +// 0x0a +BX_CPP_INLINE int float32_ngt_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_greater); +} + +// 0x0b +BX_CPP_INLINE int float32_false_quiet(float32 a, float32 b, struct float_status_t *status) +{ + float32_compare_quiet(a, b, status); + return 0; +} + +// 0x0c +BX_CPP_INLINE int float32_neq_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_equal) && (relation != float_relation_unordered); +} + +// 0x0d +BX_CPP_INLINE int float32_ge_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_greater) || (relation == float_relation_equal); +} + +// 0x0e +BX_CPP_INLINE int float32_gt_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_greater); +} + +// 0x0f +BX_CPP_INLINE int float32_true_quiet(float32 a, float32 b, struct float_status_t *status) +{ + float32_compare_quiet(a, b, status); + return 1; +} + +// 0x10 +BX_CPP_INLINE int float32_eq_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_equal); +} + +// 0x11 +BX_CPP_INLINE int float32_lt_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_less); +} + +// 0x12 +BX_CPP_INLINE int float32_le_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_equal); +} + +// 0x13 +BX_CPP_INLINE int float32_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_unordered); +} + +// 0x14 +BX_CPP_INLINE int float32_neq_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_equal); +} + +// 0x15 +BX_CPP_INLINE int float32_nlt_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_less); +} + +// 0x16 +BX_CPP_INLINE int float32_nle_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_less) && (relation != float_relation_equal); +} + +// 0x17 +BX_CPP_INLINE int float32_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_unordered); +} + +// 0x18 +BX_CPP_INLINE int float32_eq_unordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation == float_relation_equal) || (relation == float_relation_unordered); +} + +// 0x19 +BX_CPP_INLINE int float32_nge_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_unordered); +} + +// 0x1a +BX_CPP_INLINE int float32_ngt_unordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation != float_relation_greater); +} + +// 0x1b +BX_CPP_INLINE int float32_false_signalling(float32 a, float32 b, struct float_status_t *status) +{ + float32_compare_two(a, b, status); + return 0; +} + +// 0x1c +BX_CPP_INLINE int float32_neq_ordered_signalling(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_two(a, b, status); + return (relation != float_relation_equal) && (relation != float_relation_unordered); +} + +// 0x1d +BX_CPP_INLINE int float32_ge_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_greater) || (relation == float_relation_equal); +} + +// 0x1e +BX_CPP_INLINE int float32_gt_ordered_quiet(float32 a, float32 b, struct float_status_t *status) +{ + int relation = float32_compare_quiet(a, b, status); + return (relation == float_relation_greater); +} + +// 0x1f +BX_CPP_INLINE int float32_true_signalling(float32 a, float32 b, struct float_status_t *status) +{ + float32_compare_two(a, b, status); + return 1; +} + +// ======= float64 ======= // + +typedef int (*float64_compare_method)(float64, float64, struct float_status_t *status); + +// 0x00 +BX_CPP_INLINE int float64_eq_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_equal); +} + +// 0x01 +BX_CPP_INLINE int float64_lt_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_less); +} + +// 0x02 +BX_CPP_INLINE int float64_le_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_equal); +} + +// 0x03 +BX_CPP_INLINE int float64_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_unordered); +} + +// 0x04 +BX_CPP_INLINE int float64_neq_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_equal); +} + +// 0x05 +BX_CPP_INLINE int float64_nlt_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_less); +} + +// 0x06 +BX_CPP_INLINE int float64_nle_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_less) && (relation != float_relation_equal); +} + +// 0x07 +BX_CPP_INLINE int float64_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_unordered); +} + +// 0x08 +BX_CPP_INLINE int float64_eq_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_equal) || (relation == float_relation_unordered); +} + +// 0x09 +BX_CPP_INLINE int float64_nge_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_unordered); +} + +// 0x0a +BX_CPP_INLINE int float64_ngt_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_greater); +} + +// 0x0b +BX_CPP_INLINE int float64_false_quiet(float64 a, float64 b, struct float_status_t *status) +{ + float64_compare_quiet(a, b, status); + return 0; +} + +// 0x0c +BX_CPP_INLINE int float64_neq_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_equal) && (relation != float_relation_unordered); +} + +// 0x0d +BX_CPP_INLINE int float64_ge_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_greater) || (relation == float_relation_equal); +} + +// 0x0e +BX_CPP_INLINE int float64_gt_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_greater); +} + +// 0x0f +BX_CPP_INLINE int float64_true_quiet(float64 a, float64 b, struct float_status_t *status) +{ + float64_compare_quiet(a, b, status); + return 1; +} + +// 0x10 +BX_CPP_INLINE int float64_eq_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_equal); +} + +// 0x11 +BX_CPP_INLINE int float64_lt_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_less); +} + +// 0x12 +BX_CPP_INLINE int float64_le_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_equal); +} + +// 0x13 +BX_CPP_INLINE int float64_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_unordered); +} + +// 0x14 +BX_CPP_INLINE int float64_neq_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_equal); +} + +// 0x15 +BX_CPP_INLINE int float64_nlt_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_less); +} + +// 0x16 +BX_CPP_INLINE int float64_nle_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_less) && (relation != float_relation_equal); +} + +// 0x17 +BX_CPP_INLINE int float64_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_unordered); +} + +// 0x18 +BX_CPP_INLINE int float64_eq_unordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation == float_relation_equal) || (relation == float_relation_unordered); +} + +// 0x19 +BX_CPP_INLINE int float64_nge_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_less) || (relation == float_relation_unordered); +} + +// 0x1a +BX_CPP_INLINE int float64_ngt_unordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation != float_relation_greater); +} + +// 0x1b +BX_CPP_INLINE int float64_false_signalling(float64 a, float64 b, struct float_status_t *status) +{ + float64_compare_two(a, b, status); + return 0; +} + +// 0x1c +BX_CPP_INLINE int float64_neq_ordered_signalling(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_two(a, b, status); + return (relation != float_relation_equal) && (relation != float_relation_unordered); +} + +// 0x1d +BX_CPP_INLINE int float64_ge_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_greater) || (relation == float_relation_equal); +} + +// 0x1e +BX_CPP_INLINE int float64_gt_ordered_quiet(float64 a, float64 b, struct float_status_t *status) +{ + int relation = float64_compare_quiet(a, b, status); + return (relation == float_relation_greater); +} + +// 0x1f +BX_CPP_INLINE int float64_true_signalling(float64 a, float64 b, struct float_status_t *status) +{ + float64_compare_two(a, b, status); + return 1; +} + +#endif diff --git a/src/cpu/softfloat/softfloat-macros.h b/src/cpu/softfloat/softfloat-macros.h new file mode 100644 index 000000000..cb867bf5d --- /dev/null +++ b/src/cpu/softfloat/softfloat-macros.h @@ -0,0 +1,686 @@ +/*============================================================================ +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#ifndef _SOFTFLOAT_MACROS_H_ +#define _SOFTFLOAT_MACROS_H_ + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 16, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit16u shift16RightJamming(Bit16u a, int count) +{ + Bit16u z; + + if (count == 0) { + z = a; + } + else if (count < 16) { + z = (a>>count) | ((a<<((-count) & 15)) != 0); + } + else { + z = (a != 0); + } + + return z; +} + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 32, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count) +{ + Bit32u z; + + if (count == 0) { + z = a; + } + else if (count < 32) { + z = (a>>count) | ((a<<((-count) & 31)) != 0); + } + else { + z = (a != 0); + } + + return z; +} + +/*---------------------------------------------------------------------------- +| Shifts `a' right by the number of bits given in `count'. If any nonzero +| bits are shifted off, they are ``jammed'' into the least significant bit of +| the result by setting the least significant bit to 1. The value of `count' +| can be arbitrarily large; in particular, if `count' is greater than 64, the +| result will be either 0 or 1, depending on whether `a' is zero or nonzero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count) +{ + Bit64u z; + + if (count == 0) { + z = a; + } + else if (count < 64) { + z = (a>>count) | ((a << ((-count) & 63)) != 0); + } + else { + z = (a != 0); + } + + return z; +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 +| _plus_ the number of bits given in `count'. The shifted result is at most +| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The +| bits shifted off form a second 64-bit result as follows: The _last_ bit +| shifted off is the most-significant bit of the extra result, and the other +| 63 bits of the extra result are all zero if and only if _all_but_the_last_ +| bits shifted off were all zero. This extra result is stored in the location +| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. +| (This routine makes more sense if `a0' and `a1' are considered to form +| a fixed-point value with binary point between `a0' and `a1'. This fixed- +| point value is shifted right by the number of bits given in `count', and +| the integer part of the result is returned at the location pointed to by +| `z0Ptr'. The fractional part of the result may be slightly corrupted as +| described above, and is returned at the location pointed to by `z1Ptr'.) +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void shift64ExtraRightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + Bit64u z0, z1; + int negCount = (-count) & 63; + + if (count == 0) { + z1 = a1; + z0 = a0; + } + else if (count < 64) { + z1 = (a0<>count; + } + else { + if (count == 64) { + z1 = a0 | (a1 != 0); + } + else { + z1 = ((a0 | a1) != 0); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; +} + +/*---------------------------------------------------------------------------- +| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit +| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so +| any carry out is lost. The result is broken into two 64-bit pieces which +| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + Bit64u z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + (z1 < a1); +} + +/*---------------------------------------------------------------------------- +| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the +| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +| 2^128, so any borrow out (carry out) is lost. The result is broken into two +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and +| `z1Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void + sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - (a1 < b1); +} + +/*---------------------------------------------------------------------------- +| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken +| into two 64-bit pieces which are stored at the locations pointed to by +| `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + Bit32u aHigh, aLow, bHigh, bLow; + Bit64u z0, zMiddleA, zMiddleB, z1; + + aLow = (Bit32u) a; + aHigh = (Bit32u)(a>>32); + bLow = (Bit32u) b; + bHigh = (Bit32u)(b>>32); + z1 = ((Bit64u) aLow) * bLow; + zMiddleA = ((Bit64u) aLow) * bHigh; + zMiddleB = ((Bit64u) aHigh) * bLow; + z0 = ((Bit64u) aHigh) * bHigh; + zMiddleA += zMiddleB; + z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32); + zMiddleA <<= 32; + z1 += zMiddleA; + z0 += (z1 < zMiddleA); + *z1Ptr = z1; + *z0Ptr = z0; +} + +/*---------------------------------------------------------------------------- +| Returns an approximation to the 64-bit integer quotient obtained by dividing +| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The +| divisor `b' must be at least 2^63. If q is the exact quotient truncated +| toward zero, the approximation returned lies between q and q + 2 inclusive. +| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit +| unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +#ifdef USE_estimateDiv128To64 +static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b) +{ + Bit64u b0, b1; + Bit64u rem0, rem1, term0, term1; + Bit64u z; + + if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF); + b0 = b>>32; + z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32; + mul64To128(b, z, &term0, &term1); + sub128(a0, a1, term0, term1, &rem0, &rem1); + while (((Bit64s) rem0) < 0) { + z -= BX_CONST64(0x100000000); + b1 = b<<32; + add128(rem0, rem1, b0, b1, &rem0, &rem1); + } + rem0 = (rem0<<32) | (rem1>>32); + z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; + return z; +} +#endif + +/*---------------------------------------------------------------------------- +| Returns an approximation to the square root of the 32-bit significand given +| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +| `aExp' (the least significant bit) is 1, the integer returned approximates +| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +| case, the approximation returned lies strictly within +/-2 of the exact +| value. +*----------------------------------------------------------------------------*/ + +#ifdef USE_estimateSqrt32 +static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a) +{ + static const Bit16u sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const Bit16u sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + Bit32u z; + + int index = (a>>27) & 15; + if (aExp & 1) { + z = 0x4000 + (a>>17) - sqrtOddAdjustments[index]; + z = ((a / z)<<14) + (z<<15); + a >>= 1; + } + else { + z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index]; + z = a / z + z; + z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15); + if (z <= a) return (Bit32u) (((Bit32s) a)>>1); + } + return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1); +} +#endif + +static const int countLeadingZeros8[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#ifdef FLOAT16 + +/*---------------------------------------------------------------------------- +| Returns the number of leading 0 bits before the most-significant 1 bit of +| `a'. If `a' is zero, 16 is returned. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int countLeadingZeros16(Bit16u a) +{ + int shiftCount = 0; + if (a < 0x100) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZeros8[a>>8]; + return shiftCount; +} + +#endif + +/*---------------------------------------------------------------------------- +| Returns the number of leading 0 bits before the most-significant 1 bit of +| `a'. If `a' is zero, 32 is returned. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int countLeadingZeros32(Bit32u a) +{ + int shiftCount = 0; + if (a < 0x10000) { + shiftCount += 16; + a <<= 16; + } + if (a < 0x1000000) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZeros8[a>>24]; + return shiftCount; +} + +/*---------------------------------------------------------------------------- +| Returns the number of leading 0 bits before the most-significant 1 bit of +| `a'. If `a' is zero, 64 is returned. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int countLeadingZeros64(Bit64u a) +{ + int shiftCount = 0; + if (a < BX_CONST64(0x100000000)) { + shiftCount += 32; + } + else { + a >>= 32; + } + shiftCount += countLeadingZeros32((Bit32u)(a)); + return shiftCount; +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. Any bits shifted off are lost. The value +| of `count' can be arbitrarily large; in particular, if `count' is greater +| than 128, the result will be 0. The result is broken into two 64-bit pieces +| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + Bit64u z0, z1; + int negCount = (-count) & 63; + + if (count == 0) { + z1 = a1; + z0 = a0; + } + else if (count < 64) { + z1 = (a0<>count); + z0 = a0>>count; + } + else { + z1 = (count < 128) ? (a0>>(count & 63)) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; +} + +/*---------------------------------------------------------------------------- +| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the +| number of bits given in `count'. If any nonzero bits are shifted off, they +| are ``jammed'' into the least significant bit of the result by setting the +| least significant bit to 1. The value of `count' can be arbitrarily large; +| in particular, if `count' is greater than 128, the result will be either +| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or +| nonzero. The result is broken into two 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr' and `z1Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void shift128RightJamming(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr) +{ + Bit64u z0, z1; + int negCount = (-count) & 63; + + if (count == 0) { + z1 = a1; + z0 = a0; + } + else if (count < 64) { + z1 = (a0<>count) | ((a1<>count; + } + else { + if (count == 64) { + z1 = a0 | (a1 != 0); + } + else if (count < 128) { + z1 = (a0>>(count & 63)) | (((a0<>((-count) & 63)); +} + +/*---------------------------------------------------------------------------- +| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the +| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +| modulo 2^192, so any carry out is lost. The result is broken into three +| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', +| `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void add192( + Bit64u a0, + Bit64u a1, + Bit64u a2, + Bit64u b0, + Bit64u b1, + Bit64u b2, + Bit64u *z0Ptr, + Bit64u *z1Ptr, + Bit64u *z2Ptr +) +{ + Bit64u z0, z1, z2; + unsigned carry0, carry1; + + z2 = a2 + b2; + carry1 = (z2 < a2); + z1 = a1 + b1; + carry0 = (z1 < a1); + z0 = a0 + b0; + z1 += carry1; + z0 += (z1 < carry1); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; +} + +/*---------------------------------------------------------------------------- +| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' +| from the 192-bit value formed by concatenating `a0', `a1', and `a2'. +| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The +| result is broken into three 64-bit pieces which are stored at the locations +| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void sub192( + Bit64u a0, + Bit64u a1, + Bit64u a2, + Bit64u b0, + Bit64u b1, + Bit64u b2, + Bit64u *z0Ptr, + Bit64u *z1Ptr, + Bit64u *z2Ptr +) +{ + Bit64u z0, z1, z2; + unsigned borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = (a2 < b2); + z1 = a1 - b1; + borrow0 = (a1 < b1); + z0 = a0 - b0; + z0 -= (z1 < borrow1); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' +| is equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1) +{ + return (a0 == b0) && (a1 == b1); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than or equal to the 128-bit value formed by concatenating `b0' and `b1'. +| Otherwise, returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1) +{ + return (a0 < b0) || ((a0 == b0) && (a1 <= b1)); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less +| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, +| returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1) +{ + return (a0 < b0) || ((a0 == b0) && (a1 < b1)); +} + +#endif /* FLOATX80 */ + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by +| `b' to obtain a 192-bit product. The product is broken into three 64-bit +| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +| `z2Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void mul128By64To192( + Bit64u a0, + Bit64u a1, + Bit64u b, + Bit64u *z0Ptr, + Bit64u *z1Ptr, + Bit64u *z2Ptr +) +{ + Bit64u z0, z1, z2, more1; + + mul64To128(a1, b, &z1, &z2); + mul64To128(a0, b, &z0, &more1); + add128(z0, more1, 0, z1, &z0, &z1); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; +} + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the +| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit +| product. The product is broken into four 64-bit pieces which are stored at +| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void mul128To256( + Bit64u a0, + Bit64u a1, + Bit64u b0, + Bit64u b1, + Bit64u *z0Ptr, + Bit64u *z1Ptr, + Bit64u *z2Ptr, + Bit64u *z3Ptr +) +{ + Bit64u z0, z1, z2, z3; + Bit64u more1, more2; + + mul64To128(a1, b1, &z2, &z3); + mul64To128(a1, b0, &z1, &more2); + add128(z1, more2, 0, z2, &z1, &z2); + mul64To128(a0, b0, &z0, &more1); + add128(z0, more1, 0, z1, &z0, &z1); + mul64To128(a0, b1, &more1, &more2); + add128(more1, more2, 0, z2, &more1, &z2); + add128(z0, z1, 0, more1, &z0, &z1); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; +} + + +/*---------------------------------------------------------------------------- +| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right +| by 64 _plus_ the number of bits given in `count'. The shifted result is +| at most 128 nonzero bits; these are broken into two 64-bit pieces which are +| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +| off form a third 64-bit result as follows: The _last_ bit shifted off is +| the most-significant bit of the extra result, and the other 63 bits of the +| extra result are all zero if and only if _all_but_the_last_ bits shifted off +| were all zero. This extra result is stored in the location pointed to by +| `z2Ptr'. The value of `count' can be arbitrarily large. +| (This routine makes more sense if `a0', `a1', and `a2' are considered +| to form a fixed-point value with binary point between `a1' and `a2'. This +| fixed-point value is shifted right by the number of bits given in `count', +| and the integer part of the result is returned at the locations pointed to +| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +| corrupted as described above, and is returned at the location pointed to by +| `z2Ptr'.) +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void shift128ExtraRightJamming( + Bit64u a0, + Bit64u a1, + Bit64u a2, + int count, + Bit64u *z0Ptr, + Bit64u *z1Ptr, + Bit64u *z2Ptr +) +{ + Bit64u z0, z1, z2; + int negCount = (-count) & 63; + + if (count == 0) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if (count < 64) { + z2 = a1<>count); + z0 = a0>>count; + } + else { + if (count == 64) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if (count < 128) { + z2 = a0<>(count & 63); + } + else { + z2 = (count == 128) ? a0 : (a0 != 0); + z1 = 0; + } + } + z0 = 0; + } + z2 |= (a2 != 0); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; +} + +#endif /* FLOAT128 */ + +#endif diff --git a/src/cpu/softfloat/softfloat-muladd.cc b/src/cpu/softfloat/softfloat-muladd.cc new file mode 100644 index 000000000..7c9fec70e --- /dev/null +++ b/src/cpu/softfloat/softfloat-muladd.cc @@ -0,0 +1,558 @@ +/*============================================================================ +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * This code is based on QEMU patch by Peter Maydell + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloat.h" +#include "softfloat-round-pack.h" + +/*---------------------------------------------------------------------------- +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target +| if desired). +*----------------------------------------------------------------------------*/ +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| Functions and definitions to determine: (1) whether tininess for underflow +| is detected before or after rounding by default, (2) what (if anything) +| happens when exceptions are raised, (3) how signaling NaNs are distinguished +| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +| are propagated from function inputs to output. These details are target- +| specific. +*----------------------------------------------------------------------------*/ +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Takes three single-precision floating-point values `a', `b' and `c', one of +| which is a NaN, and returns the appropriate NaN result. If any of `a', +| `b' or `c' is a signaling NaN, the invalid exception is raised. +| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case +| obviously c is a NaN, and whether to propagate c or some other NaN is +| implementation defined). +*----------------------------------------------------------------------------*/ + +static float32 propagateFloat32MulAddNaN(float32 a, float32 b, float32 c, struct float_status_t *status) +{ + int aIsNaN = float32_is_nan(a); + int bIsNaN = float32_is_nan(b); + + int aIsSignalingNaN = float32_is_signaling_nan(a); + int bIsSignalingNaN = float32_is_signaling_nan(b); + int cIsSignalingNaN = float32_is_signaling_nan(c); + + a |= 0x00400000; + b |= 0x00400000; + c |= 0x00400000; + + if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) + float_raise(status, float_flag_invalid); + + // operate according to float_first_operand_nan mode + if (aIsSignalingNaN | aIsNaN) { + return a; + } + else { + return (bIsSignalingNaN | bIsNaN) ? b : c; + } +} + +/*---------------------------------------------------------------------------- +| Takes three double-precision floating-point values `a', `b' and `c', one of +| which is a NaN, and returns the appropriate NaN result. If any of `a', +| `b' or `c' is a signaling NaN, the invalid exception is raised. +| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case +| obviously c is a NaN, and whether to propagate c or some other NaN is +| implementation defined). +*----------------------------------------------------------------------------*/ + +static float64 propagateFloat64MulAddNaN(float64 a, float64 b, float64 c, struct float_status_t *status) +{ + int aIsNaN = float64_is_nan(a); + int bIsNaN = float64_is_nan(b); + + int aIsSignalingNaN = float64_is_signaling_nan(a); + int bIsSignalingNaN = float64_is_signaling_nan(b); + int cIsSignalingNaN = float64_is_signaling_nan(c); + + a |= BX_CONST64(0x0008000000000000); + b |= BX_CONST64(0x0008000000000000); + c |= BX_CONST64(0x0008000000000000); + + if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) + float_raise(status, float_flag_invalid); + + // operate according to float_first_operand_nan mode + if (aIsSignalingNaN | aIsNaN) { + return a; + } + else { + return (bIsSignalingNaN | bIsNaN) ? b : c; + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the single-precision floating-point values +| `a' and `b' then adding 'c', with no intermediate rounding step after the +| multiplication. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic 754-2008. +| The flags argument allows the caller to select negation of the +| addend, the intermediate product, or the final result. (The difference +| between this and having the caller do a separate negation is that negating +| externally will flip the sign bit on NaNs.) +*----------------------------------------------------------------------------*/ + +float32 float32_muladd(float32 a, float32 b, float32 c, int flags, struct float_status_t *status) +{ + int aSign, bSign, cSign, zSign; + Bit16s aExp, bExp, cExp, pExp, zExp; + Bit32u aSig, bSig, cSig; + int pInf, pZero, pSign; + Bit64u pSig64, cSig64, zSig64; + Bit32u pSig; + int shiftcount; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + bSign = extractFloat32Sign(b); + cSig = extractFloat32Frac(c); + cExp = extractFloat32Exp(c); + cSign = extractFloat32Sign(c); + + /* It is implementation-defined whether the cases of (0,inf,qnan) + * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN + * they return if they do), so we have to hand this information + * off to the target-specific pick-a-NaN routine. + */ + if (((aExp == 0xff) && aSig) || + ((bExp == 0xff) && bSig) || + ((cExp == 0xff) && cSig)) { + return propagateFloat32MulAddNaN(a, b, c, status); + } + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + if (cExp == 0) cSig = 0; + } + + int infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) || + (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0)); + + if (infzero) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + + if (flags & float_muladd_negate_c) { + cSign ^= 1; + } + + /* Work out the sign and type of the product */ + pSign = aSign ^ bSign; + if (flags & float_muladd_negate_product) { + pSign ^= 1; + } + pInf = (aExp == 0xff) || (bExp == 0xff); + pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); + + if (cExp == 0xff) { + if (pInf && (pSign ^ cSign)) { + /* addition of opposite-signed infinities => InvalidOperation */ + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + /* Otherwise generate an infinity of the same sign */ + if ((aSig && aExp == 0) || (bSig && bExp == 0)) { + float_raise(status, float_flag_denormal); + } + return packFloat32(cSign, 0xff, 0); + } + + if (pInf) { + if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) { + float_raise(status, float_flag_denormal); + } + return packFloat32(pSign, 0xff, 0); + } + + if (pZero) { + if (cExp == 0) { + if (cSig == 0) { + /* Adding two exact zeroes */ + if (pSign == cSign) { + zSign = pSign; + } else if (get_float_rounding_mode(status) == float_round_down) { + zSign = 1; + } else { + zSign = 0; + } + return packFloat32(zSign, 0, 0); + } + /* Exact zero plus a denormal */ + float_raise(status, float_flag_denormal); + if (get_flush_underflow_to_zero(status)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat32(cSign, 0, 0); + } + } + /* Zero plus something non-zero */ + return packFloat32(cSign, cExp, cSig); + } + + if (aExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(bSig, &bExp, &bSig); + } + + /* Calculate the actual result a * b + c */ + + /* Multiply first; this is easy. */ + /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f + * because we want the true exponent, not the "one-less-than" + * flavour that roundAndPackFloat32() takes. + */ + pExp = aExp + bExp - 0x7e; + aSig = (aSig | 0x00800000) << 7; + bSig = (bSig | 0x00800000) << 8; + pSig64 = (Bit64u)aSig * bSig; + if ((Bit64s)(pSig64 << 1) >= 0) { + pSig64 <<= 1; + pExp--; + } + + zSign = pSign; + + /* Now pSig64 is the significand of the multiply, with the explicit bit in + * position 62. + */ + if (cExp == 0) { + if (!cSig) { + /* Throw out the special case of c being an exact zero now */ + pSig = (Bit32u) shift64RightJamming(pSig64, 32); + return roundAndPackFloat32(zSign, pExp - 1, pSig, status); + } + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(cSig, &cExp, &cSig); + } + + cSig64 = (Bit64u)cSig << 39; + cSig64 |= BX_CONST64(0x4000000000000000); + int expDiff = pExp - cExp; + + if (pSign == cSign) { + /* Addition */ + if (expDiff > 0) { + /* scale c to match p */ + cSig64 = shift64RightJamming(cSig64, expDiff); + zExp = pExp; + } else if (expDiff < 0) { + /* scale p to match c */ + pSig64 = shift64RightJamming(pSig64, -expDiff); + zExp = cExp; + } else { + /* no scaling needed */ + zExp = cExp; + } + /* Add significands and make sure explicit bit ends up in posn 62 */ + zSig64 = pSig64 + cSig64; + if ((Bit64s)zSig64 < 0) { + zSig64 = shift64RightJamming(zSig64, 1); + } else { + zExp--; + } + zSig64 = shift64RightJamming(zSig64, 32); + return roundAndPackFloat32(zSign, zExp, zSig64, status); + } else { + /* Subtraction */ + if (expDiff > 0) { + cSig64 = shift64RightJamming(cSig64, expDiff); + zSig64 = pSig64 - cSig64; + zExp = pExp; + } else if (expDiff < 0) { + pSig64 = shift64RightJamming(pSig64, -expDiff); + zSig64 = cSig64 - pSig64; + zExp = cExp; + zSign ^= 1; + } else { + zExp = pExp; + if (cSig64 < pSig64) { + zSig64 = pSig64 - cSig64; + } else if (pSig64 < cSig64) { + zSig64 = cSig64 - pSig64; + zSign ^= 1; + } else { + /* Exact zero */ + return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0); + } + } + --zExp; + /* Do the equivalent of normalizeRoundAndPackFloat32() but + * starting with the significand in a Bit64u. + */ + shiftcount = countLeadingZeros64(zSig64) - 1; + zSig64 <<= shiftcount; + zExp -= shiftcount; + zSig64 = shift64RightJamming(zSig64, 32); + return roundAndPackFloat32(zSign, zExp, zSig64, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the double-precision floating-point values +| `a' and `b' then adding 'c', with no intermediate rounding step after the +| multiplication. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic 754-2008. +| The flags argument allows the caller to select negation of the +| addend, the intermediate product, or the final result. (The difference +| between this and having the caller do a separate negation is that negating +| externally will flip the sign bit on NaNs.) +*----------------------------------------------------------------------------*/ + +float64 float64_muladd(float64 a, float64 b, float64 c, int flags, struct float_status_t *status) +{ + int aSign, bSign, cSign, zSign; + Bit16s aExp, bExp, cExp, pExp, zExp; + Bit64u aSig, bSig, cSig; + int pInf, pZero, pSign; + Bit64u pSig0, pSig1, cSig0, cSig1, zSig0, zSig1; + int shiftcount; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + bSign = extractFloat64Sign(b); + cSig = extractFloat64Frac(c); + cExp = extractFloat64Exp(c); + cSign = extractFloat64Sign(c); + + /* It is implementation-defined whether the cases of (0,inf,qnan) + * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN + * they return if they do), so we have to hand this information + * off to the target-specific pick-a-NaN routine. + */ + if (((aExp == 0x7ff) && aSig) || + ((bExp == 0x7ff) && bSig) || + ((cExp == 0x7ff) && cSig)) { + return propagateFloat64MulAddNaN(a, b, c, status); + } + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + if (cExp == 0) cSig = 0; + } + + int infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) || + (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0)); + + if (infzero) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + + if (flags & float_muladd_negate_c) { + cSign ^= 1; + } + + /* Work out the sign and type of the product */ + pSign = aSign ^ bSign; + if (flags & float_muladd_negate_product) { + pSign ^= 1; + } + pInf = (aExp == 0x7ff) || (bExp == 0x7ff); + pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0); + + if (cExp == 0x7ff) { + if (pInf && (pSign ^ cSign)) { + /* addition of opposite-signed infinities => InvalidOperation */ + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + /* Otherwise generate an infinity of the same sign */ + if ((aSig && aExp == 0) || (bSig && bExp == 0)) { + float_raise(status, float_flag_denormal); + } + return packFloat64(cSign, 0x7ff, 0); + } + + if (pInf) { + if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) { + float_raise(status, float_flag_denormal); + } + return packFloat64(pSign, 0x7ff, 0); + } + + if (pZero) { + if (cExp == 0) { + if (cSig == 0) { + /* Adding two exact zeroes */ + if (pSign == cSign) { + zSign = pSign; + } else if (get_float_rounding_mode(status) == float_round_down) { + zSign = 1; + } else { + zSign = 0; + } + return packFloat64(zSign, 0, 0); + } + /* Exact zero plus a denormal */ + float_raise(status, float_flag_denormal); + if (get_flush_underflow_to_zero(status)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat64(cSign, 0, 0); + } + } + /* Zero plus something non-zero */ + return packFloat64(cSign, cExp, cSig); + } + + if (aExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(bSig, &bExp, &bSig); + } + + /* Calculate the actual result a * b + c */ + + /* Multiply first; this is easy. */ + /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff + * because we want the true exponent, not the "one-less-than" + * flavour that roundAndPackFloat64() takes. + */ + pExp = aExp + bExp - 0x3fe; + aSig = (aSig | BX_CONST64(0x0010000000000000))<<10; + bSig = (bSig | BX_CONST64(0x0010000000000000))<<11; + mul64To128(aSig, bSig, &pSig0, &pSig1); + if ((Bit64s)(pSig0 << 1) >= 0) { + shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1); + pExp--; + } + + zSign = pSign; + + /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit + * bit in position 126. + */ + if (cExp == 0) { + if (!cSig) { + /* Throw out the special case of c being an exact zero now */ + shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1); + return roundAndPackFloat64(zSign, pExp - 1, pSig1, status); + } + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(cSig, &cExp, &cSig); + } + + cSig0 = cSig << 10; + cSig1 = 0; + cSig0 |= BX_CONST64(0x4000000000000000); + int expDiff = pExp - cExp; + + if (pSign == cSign) { + /* Addition */ + if (expDiff > 0) { + /* scale c to match p */ + shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); + zExp = pExp; + } else if (expDiff < 0) { + /* scale p to match c */ + shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); + zExp = cExp; + } else { + /* no scaling needed */ + zExp = cExp; + } + /* Add significands and make sure explicit bit ends up in posn 126 */ + add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + if ((Bit64s)zSig0 < 0) { + shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1); + } else { + zExp--; + } + shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1); + return roundAndPackFloat64(zSign, zExp, zSig1, status); + } else { + /* Subtraction */ + if (expDiff > 0) { + shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1); + sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + zExp = pExp; + } else if (expDiff < 0) { + shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1); + sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); + zExp = cExp; + zSign ^= 1; + } else { + zExp = pExp; + if (lt128(cSig0, cSig1, pSig0, pSig1)) { + sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1); + } else if (lt128(pSig0, pSig1, cSig0, cSig1)) { + sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1); + zSign ^= 1; + } else { + /* Exact zero */ + return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0); + } + } + --zExp; + /* Do the equivalent of normalizeRoundAndPackFloat64() but + * starting with the significand in a pair of Bit64u. + */ + if (zSig0) { + shiftcount = countLeadingZeros64(zSig0) - 1; + shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1); + if (zSig1) { + zSig0 |= 1; + } + zExp -= shiftcount; + } else { + shiftcount = countLeadingZeros64(zSig1) - 1; + zSig0 = zSig1 << shiftcount; + zExp -= (shiftcount + 64); + } + return roundAndPackFloat64(zSign, zExp, zSig0, status); + } +} diff --git a/src/cpu/softfloat/softfloat-round-pack.cc b/src/cpu/softfloat/softfloat-round-pack.cc new file mode 100644 index 000000000..2b3965840 --- /dev/null +++ b/src/cpu/softfloat/softfloat-round-pack.cc @@ -0,0 +1,896 @@ +/*============================================================================ +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#define FLOAT128 + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloat.h" +#include "softfloat-round-pack.h" + +/*---------------------------------------------------------------------------- +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target +| if desired). +*----------------------------------------------------------------------------*/ +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| Functions and definitions to determine: (1) whether tininess for underflow +| is detected before or after rounding by default, (2) what (if anything) +| happens when exceptions are raised, (3) how signaling NaNs are distinguished +| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +| are propagated from function inputs to output. These details are target- +| specific. +*----------------------------------------------------------------------------*/ +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +| and 7, and returns the properly rounded 32-bit integer corresponding to the +| input. If `zSign' is 1, the input is negated before being converted to an +| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input +| is simply rounded to an integer, with the inexact exception raised if the +| input cannot be represented exactly as an integer. However, if the fixed- +| point input is too large, the invalid exception is raised and the integer +| indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s roundAndPackInt32(int zSign, Bit64u exactAbsZ, struct float_status_t *status) +{ + int roundingMode = get_float_rounding_mode(status); + int roundNearestEven = (roundingMode == float_round_nearest_even); + int roundIncrement = 0x40; + if (! roundNearestEven) { + if (roundingMode == float_round_to_zero) roundIncrement = 0; + else { + roundIncrement = 0x7F; + if (zSign) { + if (roundingMode == float_round_up) roundIncrement = 0; + } + else { + if (roundingMode == float_round_down) roundIncrement = 0; + } + } + } + int roundBits = (int)(exactAbsZ & 0x7F); + Bit64u absZ = (exactAbsZ + roundIncrement)>>7; + absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); + Bit32s z = (Bit32s) absZ; + if (zSign) z = -z; + if ((absZ>>32) || (z && ((z < 0) ^ zSign))) { + float_raise(status, float_flag_invalid); + return (Bit32s)(int32_indefinite); + } + if (roundBits) { + float_raise(status, float_flag_inexact); + if ((absZ << 7) > exactAbsZ) + set_float_rounding_up(status); + } + return z; +} + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit integer corresponding to the input. +| If `zSign' is 1, the input is negated before being converted to an integer. +| Ordinarily, the fixed-point input is simply rounded to an integer, with +| the inexact exception raised if the input cannot be represented exactly as +| an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status) +{ + Bit64s z; + int roundingMode = get_float_rounding_mode(status); + int roundNearestEven = (roundingMode == float_round_nearest_even); + int increment = ((Bit64s) absZ1 < 0); + if (! roundNearestEven) { + if (roundingMode == float_round_to_zero) increment = 0; + else { + if (zSign) { + increment = (roundingMode == float_round_down) && absZ1; + } + else { + increment = (roundingMode == float_round_up) && absZ1; + } + } + } + Bit64u exactAbsZ0 = absZ0; + if (increment) { + ++absZ0; + if (absZ0 == 0) goto overflow; + absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven); + } + z = absZ0; + if (zSign) z = -z; + if (z && ((z < 0) ^ zSign)) { + overflow: + float_raise(status, float_flag_invalid); + return (Bit64s)(int64_indefinite); + } + if (absZ1) { + float_raise(status, float_flag_inexact); + if (absZ0 > exactAbsZ0) + set_float_rounding_up(status); + } + return z; +} + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status) +{ + int roundingMode = get_float_rounding_mode(status); + int roundNearestEven = (roundingMode == float_round_nearest_even); + int increment = ((Bit64s) absZ1 < 0); + if (!roundNearestEven) { + if (roundingMode == float_round_to_zero) { + increment = 0; + } else if (absZ1) { + if (zSign) { + increment = (roundingMode == float_round_down) && absZ1; + } else { + increment = (roundingMode == float_round_up) && absZ1; + } + } + } + if (increment) { + ++absZ0; + if (absZ0 == 0) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven); + } + + if (zSign && absZ0) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + if (absZ1) { + float_raise(status, float_flag_inexact); + } + return absZ0; +} + +#ifdef FLOAT16 + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal half-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr) +{ + int shiftCount = countLeadingZeros16(aSig) - 5; + *zSigPtr = aSig<> 4; + zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven); + if (zSigRound == 0) zExp = 0; + return packFloat16(zSign, zExp, zSigRound); +} + +#endif + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal single-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr) +{ + int shiftCount = countLeadingZeros32(aSig) - 8; + *zSigPtr = aSig<> 7; + zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); + if (zSigRound == 0) zExp = 0; + if (roundBits) { + float_raise(status, float_flag_inexact); + if ((zSigRound << 7) > zSig) set_float_rounding_up(status); + } + return packFloat32(zSign, zExp, zSigRound); +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper single-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat32' except that `zSig' does not have to be normalized. +| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status) +{ + int shiftCount = countLeadingZeros32(zSig) - 1; + return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<>10; + zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); + if (zSigRound == 0) zExp = 0; + if (roundBits) { + float_raise(status, float_flag_inexact); + if ((zSigRound << 10) > zSig) set_float_rounding_up(status); + } + return packFloat64(zSign, zExp, zSigRound); +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper double-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat64' except that `zSig' does not have to be normalized. +| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status) +{ + int shiftCount = countLeadingZeros64(zSig) - 1; + return roundAndPackFloat64(zSign, zExp - shiftCount, zSig< zSigExact) set_float_rounding_up(status); + } + return packFloatx80(zSign, zExp, zSig0); + } + } + if (roundBits) float_raise(status, float_flag_inexact); + zSigExact = zSig0; + zSig0 += roundIncrement; + if (zSig0 < roundIncrement) { + // Basically scale by shifting right and keep overflow + ++zExp; + zSig0 = BX_CONST64(0x8000000000000000); + zSigExact >>= 1; // must scale also, or else later tests will fail + } + roundIncrement = roundMask + 1; + if (roundNearestEven && (roundBits<<1 == roundIncrement)) + roundMask |= roundIncrement; + zSig0 &= ~roundMask; + if (zSig0 > zSigExact) set_float_rounding_up(status); + if (zSig0 == 0) zExp = 0; + return packFloatx80(zSign, zExp, zSig0); + precision80: + increment = ((Bit64s) zSig1 < 0); + if (! roundNearestEven) { + if (roundingMode == float_round_to_zero) increment = 0; + else { + if (zSign) { + increment = (roundingMode == float_round_down) && zSig1; + } + else { + increment = (roundingMode == float_round_up) && zSig1; + } + } + } + if (0x7FFD <= (Bit32u) (zExp - 1)) { + if ((0x7FFE < zExp) + || ((zExp == 0x7FFE) + && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF)) + && increment)) + { + roundMask = 0; + overflow: + float_raise(status, float_flag_overflow | float_flag_inexact); + if ((roundingMode == float_round_to_zero) + || (zSign && (roundingMode == float_round_up)) + || (! zSign && (roundingMode == float_round_down))) + { + return packFloatx80(zSign, 0x7FFE, ~roundMask); + } + set_float_rounding_up(status); + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (zExp <= 0) { + int isTiny = (zExp < 0) || (! increment) + || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF)); + shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1); + zExp = 0; + if (isTiny) { + if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow))) + float_raise(status, float_flag_underflow); + } + if (zSig1) float_raise(status, float_flag_inexact); + if (roundNearestEven) increment = ((Bit64s) zSig1 < 0); + else { + if (zSign) { + increment = (roundingMode == float_round_down) && zSig1; + } else { + increment = (roundingMode == float_round_up) && zSig1; + } + } + if (increment) { + zSigExact = zSig0++; + zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven); + if (zSig0 > zSigExact) set_float_rounding_up(status); + if ((Bit64s) zSig0 < 0) zExp = 1; + } + return packFloatx80(zSign, zExp, zSig0); + } + } + if (zSig1) float_raise(status, float_flag_inexact); + if (increment) { + zSigExact = zSig0++; + if (zSig0 == 0) { + zExp++; + zSig0 = BX_CONST64(0x8000000000000000); + zSigExact >>= 1; // must scale also, or else later tests will fail + } + else { + zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven); + } + if (zSig0 > zSigExact) set_float_rounding_up(status); + } + else { + if (zSig0 == 0) zExp = 0; + } + return packFloatx80(zSign, zExp, zSig0); +} + +floatx80 roundAndPackFloatx80(int roundingPrecision, + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status) +{ + struct float_status_t *round_status = status; + floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status); + + // bias unmasked undeflow + if (status->float_exception_flags & ~status->float_exception_masks & float_flag_underflow) { + float_raise(round_status, float_flag_underflow); + return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status); + } + + // bias unmasked overflow + if (status->float_exception_flags & ~status->float_exception_masks & float_flag_overflow) { + float_raise(round_status, float_flag_overflow); + return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status); + } + + return result; +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent +| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. This routine is just like +| `roundAndPackFloatx80' except that the input significand does not have to be +| normalized. +*----------------------------------------------------------------------------*/ + +floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status) +{ + if (zSig0 == 0) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + int shiftCount = countLeadingZeros64(zSig0); + shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); + zExp -= shiftCount; + return + roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status); +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal quadruple-precision floating-point value +| represented by the denormalized significand formed by the concatenation of +| `aSig0' and `aSig1'. The normalized exponent is stored at the location +| pointed to by `zExpPtr'. The most significant 49 bits of the normalized +| significand are stored at the location pointed to by `zSig0Ptr', and the +| least significant 64 bits of the normalized significand are stored at the +| location pointed to by `zSig1Ptr'. +*----------------------------------------------------------------------------*/ + +void normalizeFloat128Subnormal( + Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr) +{ + int shiftCount; + + if (aSig0 == 0) { + shiftCount = countLeadingZeros64(aSig1) - 15; + if (shiftCount < 0) { + *zSig0Ptr = aSig1 >>(-shiftCount); + *zSig1Ptr = aSig1 << (shiftCount & 63); + } + else { + *zSig0Ptr = aSig1 << shiftCount; + *zSig1Ptr = 0; + } + *zExpPtr = - shiftCount - 63; + } + else { + shiftCount = countLeadingZeros64(aSig0) - 15; + shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr); + *zExpPtr = 1 - shiftCount; + } +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0', `zSig1', +| and `zSig2', and returns the proper quadruple-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| simply rounded and packed into the quadruple-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal quadruple- +| precision floating-point number. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. In the +| usual case that the input significand is normalized, `zExp' must be 1 less +| than the ``true'' floating-point exponent. The handling of underflow and +| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 roundAndPackFloat128( + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status) +{ + int increment = ((Bit64s) zSig2 < 0); + if (0x7FFD <= (Bit32u) zExp) { + if ((0x7FFD < zExp) + || ((zExp == 0x7FFD) + && eq128(BX_CONST64(0x0001FFFFFFFFFFFF), + BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1) + && increment)) + { + float_raise(status, float_flag_overflow | float_flag_inexact); + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + if (zExp < 0) { + int isTiny = (zExp < -1) + || ! increment + || lt128(zSig0, zSig1, + BX_CONST64(0x0001FFFFFFFFFFFF), + BX_CONST64(0xFFFFFFFFFFFFFFFF)); + shift128ExtraRightJamming( + zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2); + zExp = 0; + if (isTiny && zSig2) float_raise(status, float_flag_underflow); + increment = ((Bit64s) zSig2 < 0); + } + } + if (zSig2) float_raise(status, float_flag_inexact); + if (increment) { + add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1); + zSig1 &= ~((zSig2 + zSig2 == 0) & 1); + } + else { + if ((zSig0 | zSig1) == 0) zExp = 0; + } + return packFloat128Four(zSign, zExp, zSig0, zSig1); +} + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand formed by the concatenation of `zSig0' and `zSig1', and +| returns the proper quadruple-precision floating-point value corresponding +| to the abstract input. This routine is just like `roundAndPackFloat128' +| except that the input significand has fewer bits and does not have to be +| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- +| point exponent. +*----------------------------------------------------------------------------*/ + +float128 normalizeRoundAndPackFloat128( + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status) +{ + Bit64u zSig2; + + if (zSig0 == 0) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + int shiftCount = countLeadingZeros64(zSig0) - 15; + if (0 <= shiftCount) { + zSig2 = 0; + shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); + } + else { + shift128ExtraRightJamming( + zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2); + } + zExp -= shiftCount; + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); +} + +#endif diff --git a/src/cpu/softfloat/softfloat-round-pack.h b/src/cpu/softfloat/softfloat-round-pack.h new file mode 100644 index 000000000..1422aaea6 --- /dev/null +++ b/src/cpu/softfloat/softfloat-round-pack.h @@ -0,0 +1,309 @@ +/*============================================================================ +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#ifndef _SOFTFLOAT_ROUND_PACK_H_ +#define _SOFTFLOAT_ROUND_PACK_H_ + +#include "softfloat.h" + +/*---------------------------------------------------------------------------- +| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 +| and 7, and returns the properly rounded 32-bit integer corresponding to the +| input. If `zSign' is 1, the input is negated before being converted to an +| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input +| is simply rounded to an integer, with the inexact exception raised if the +| input cannot be represented exactly as an integer. However, if the fixed- +| point input is too large, the invalid exception is raised and the integer +| indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s roundAndPackInt32(int zSign, Bit64u absZ, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit integer corresponding to the input. +| If `zSign' is 1, the input is negated before being converted to an integer. +| Ordinarily, the fixed-point input is simply rounded to an integer, with +| the inexact exception raised if the input cannot be represented exactly as +| an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u roundAndPackUint64(int zSign, Bit64u absZ0, Bit64u absZ1, struct float_status_t *status); + +#ifdef FLOAT16 + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal half-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloat16Subnormal(Bit16u aSig, Bit16s *zExpPtr, Bit16u *zSigPtr); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper half-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the half-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal single- +| precision floating-point number. +| The input significand `zSig' has its binary point between bits 14 +| and 13, which is 4 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float16 roundAndPackFloat16(int zSign, Bit16s zExp, Bit16u zSig, struct float_status_t *status); + +#endif + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal single-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper single-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the single-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal single- +| precision floating-point number. +| The input significand `zSig' has its binary point between bits 30 +| and 29, which is 7 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper single-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat32' except that `zSig' does not have to be normalized. +| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal double-precision floating-point value represented +| by the denormalized significand `aSig'. The normalized exponent and +| significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper double-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the double-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded +| to a subnormal number, and the underflow and inexact exceptions are raised +| if the abstract input cannot be represented exactly as a subnormal double- +| precision floating-point number. +| The input significand `zSig' has its binary point between bits 62 +| and 61, which is 10 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper double-precision floating- +| point value corresponding to the abstract input. This routine is just like +| `roundAndPackFloat64' except that `zSig' does not have to be normalized. +| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +| floating-point exponent. +*----------------------------------------------------------------------------*/ + +float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, struct float_status_t *status); + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| rounded and packed into the extended double-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal extended +| double-precision floating-point number. +| If `roundingPrecision' is 32 or 64, the result is rounded to the same +| number of bits as single or double precision, respectively. Otherwise, the +| result is rounded to the full precision of the extended double-precision +| format. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. The +| handling of underflow and overflow follows the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 roundAndPackFloatx80(int roundingPrecision, + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent +| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', +| and returns the proper extended double-precision floating-point value +| corresponding to the abstract input. This routine is just like +| `roundAndPackFloatx80' except that the input significand does not have to be +| normalized. +*----------------------------------------------------------------------------*/ + +floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status); + +#endif // FLOATX80 + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal quadruple-precision floating-point value +| represented by the denormalized significand formed by the concatenation of +| `aSig0' and `aSig1'. The normalized exponent is stored at the location +| pointed to by `zExpPtr'. The most significant 49 bits of the normalized +| significand are stored at the location pointed to by `zSig0Ptr', and the +| least significant 64 bits of the normalized significand are stored at the +| location pointed to by `zSig1Ptr'. +*----------------------------------------------------------------------------*/ + +void normalizeFloat128Subnormal( + Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and extended significand formed by the concatenation of `zSig0', `zSig1', +| and `zSig2', and returns the proper quadruple-precision floating-point value +| corresponding to the abstract input. Ordinarily, the abstract value is +| simply rounded and packed into the quadruple-precision format, with the +| inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal quadruple- +| precision floating-point number. +| The input significand must be normalized or smaller. If the input +| significand is not normalized, `zExp' must be 0; in that case, the result +| returned is a subnormal number, and it must not require rounding. In the +| usual case that the input significand is normalized, `zExp' must be 1 less +| than the ``true'' floating-point exponent. The handling of underflow and +| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 roundAndPackFloat128( + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand formed by the concatenation of `zSig0' and `zSig1', and +| returns the proper quadruple-precision floating-point value corresponding +| to the abstract input. This routine is just like `roundAndPackFloat128' +| except that the input significand has fewer bits and does not have to be +| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- +| point exponent. +*----------------------------------------------------------------------------*/ + +float128 normalizeRoundAndPackFloat128( + int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, struct float_status_t *status); + +#endif // FLOAT128 + +#endif diff --git a/src/cpu/softfloat/softfloat-specialize.cc b/src/cpu/softfloat/softfloat-specialize.cc new file mode 100644 index 000000000..bf0d11144 --- /dev/null +++ b/src/cpu/softfloat/softfloat-specialize.cc @@ -0,0 +1,187 @@ +/*============================================================================ +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#define FLOAT128 + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloat.h" +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Takes two single-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status) +{ + int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan(a); + aIsSignalingNaN = float32_is_signaling_nan(a); + bIsNaN = float32_is_nan(b); + bIsSignalingNaN = float32_is_signaling_nan(b); + a |= 0x00400000; + b |= 0x00400000; + if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid); + if (get_float_nan_handling_mode(status) == float_larger_significand_nan) { + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN | ! bIsNaN) return a; + returnLargerSignificand: + if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b; + if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a; + return (a < b) ? a : b; + } + else { + return b; + } + } else { + return (aIsSignalingNaN | aIsNaN) ? a : b; + } +} + +/*---------------------------------------------------------------------------- +| Takes two double-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status) +{ + int aIsNaN = float64_is_nan(a); + int aIsSignalingNaN = float64_is_signaling_nan(a); + int bIsNaN = float64_is_nan(b); + int bIsSignalingNaN = float64_is_signaling_nan(b); + a |= BX_CONST64(0x0008000000000000); + b |= BX_CONST64(0x0008000000000000); + if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid); + if (get_float_nan_handling_mode(status) == float_larger_significand_nan) { + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN | ! bIsNaN) return a; + returnLargerSignificand: + if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b; + if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a; + return (a < b) ? a : b; + } + else { + return b; + } + } else { + return (aIsSignalingNaN | aIsNaN) ? a : b; + } +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Takes two extended double-precision floating-point values `a' and `b', one +| of which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status) +{ + int aIsNaN = floatx80_is_nan(a); + int aIsSignalingNaN = floatx80_is_signaling_nan(a); + int bIsNaN = floatx80_is_nan(b); + int bIsSignalingNaN = floatx80_is_signaling_nan(b); + a.fraction |= BX_CONST64(0xC000000000000000); + b.fraction |= BX_CONST64(0xC000000000000000); + if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid); + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN | ! bIsNaN) return a; + returnLargerSignificand: + if (a.fraction < b.fraction) return b; + if (b.fraction < a.fraction) return a; + return (a.exp < b.exp) ? a : b; + } + else { + return b; + } +} + +#endif /* FLOATX80 */ + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Takes two quadruple-precision floating-point values `a' and `b', one of +| which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status) +{ + int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + aIsNaN = float128_is_nan(a); + aIsSignalingNaN = float128_is_signaling_nan(a); + bIsNaN = float128_is_nan(b); + bIsSignalingNaN = float128_is_signaling_nan(b); + a.hi |= BX_CONST64(0x0000800000000000); + b.hi |= BX_CONST64(0x0000800000000000); + if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid); + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN | !bIsNaN) return a; + returnLargerSignificand: + if (lt128(a.hi<<1, a.lo, b.hi<<1, b.lo)) return b; + if (lt128(b.hi<<1, b.lo, a.hi<<1, a.lo)) return a; + return (a.hi < b.hi) ? a : b; + } + else { + return b; + } +} + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. +*----------------------------------------------------------------------------*/ +const float128 float128_default_nan = + packFloat128(float128_default_nan_hi, float128_default_nan_lo); + +#endif /* FLOAT128 */ diff --git a/src/cpu/softfloat/softfloat-specialize.h b/src/cpu/softfloat/softfloat-specialize.h new file mode 100644 index 000000000..11326ecb6 --- /dev/null +++ b/src/cpu/softfloat/softfloat-specialize.h @@ -0,0 +1,788 @@ +/*============================================================================ +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#ifndef _SOFTFLOAT_SPECIALIZE_H_ +#define _SOFTFLOAT_SPECIALIZE_H_ + +#include "softfloat.h" + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define int16_indefinite ((Bit16s)0x8000) +#define int32_indefinite ((Bit32s)0x80000000) +#define int64_indefinite BX_CONST64(0x8000000000000000) + +#define uint16_indefinite (0xffff) +#define uint32_indefinite (0xffffffff) +#define uint64_indefinite BX_CONST64(0xffffffffffffffff) + +/*---------------------------------------------------------------------------- +| Internal canonical NaN format. +*----------------------------------------------------------------------------*/ + +typedef struct { + int sign; + Bit64u hi, lo; +} commonNaNT; + +#ifdef FLOAT16 + +/*---------------------------------------------------------------------------- +| The pattern for a default generated half-precision NaN. +*----------------------------------------------------------------------------*/ +extern const float16 float16_default_nan; + +#define float16_fraction extractFloat16Frac +#define float16_exp extractFloat16Exp +#define float16_sign extractFloat16Sign + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit16u extractFloat16Frac(float16 a) +{ + return a & 0x3FF; +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit16s extractFloat16Exp(float16 a) +{ + return (a>>10) & 0x1F; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the half-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int extractFloat16Sign(float16 a) +{ + return a>>15; +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| single-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float16 packFloat16(int zSign, int zExp, Bit16u zSig) +{ + return (((Bit16u) zSign)<<15) + (((Bit16u) zExp)<<10) + zSig; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float16_is_nan(float16 a) +{ + return (0xF800 < (Bit16u) (a<<1)); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float16_is_signaling_nan(float16 a) +{ + return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the half-precision floating-point value `a' is denormal; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float16_is_denormal(float16 a) +{ + return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0); +} + +/*---------------------------------------------------------------------------- +| Convert float16 denormals to zero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float16 float16_denormal_to_zero(float16 a) +{ + if (float16_is_denormal(a)) a &= 0x8000; + return a; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the half-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE commonNaNT float16ToCommonNaN(float16 a, struct float_status_t *status) +{ + commonNaNT z; + if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid); + z.sign = a>>15; + z.lo = 0; + z.hi = ((Bit64u) a)<<54; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the half- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float16 commonNaNToFloat16(commonNaNT a) +{ + return (((Bit16u) a.sign)<<15) | 0x7E00 | (Bit16u)(a.hi>>54); +} + +#endif + +/*---------------------------------------------------------------------------- +| Commonly used single-precision floating point constants +*----------------------------------------------------------------------------*/ +extern const float32 float32_negative_inf; +extern const float32 float32_positive_inf; +extern const float32 float32_negative_zero; +extern const float32 float32_positive_zero; +extern const float32 float32_negative_one; +extern const float32 float32_positive_one; +extern const float32 float32_max_float; +extern const float32 float32_min_float; + +/*---------------------------------------------------------------------------- +| The pattern for a default generated single-precision NaN. +*----------------------------------------------------------------------------*/ +extern const float32 float32_default_nan; + +#define float32_fraction extractFloat32Frac +#define float32_exp extractFloat32Exp +#define float32_sign extractFloat32Sign + +#define FLOAT32_EXP_BIAS 0x7F + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a) +{ + return a & 0x007FFFFF; +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a) +{ + return (a>>23) & 0xFF; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the single-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int extractFloat32Sign(float32 a) +{ + return a>>31; +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| single-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float32 packFloat32(int zSign, Bit16s zExp, Bit32u zSig) +{ + return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float32_is_nan(float32 a) +{ + return (0xFF000000 < (Bit32u) (a<<1)); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float32_is_signaling_nan(float32 a) +{ + return (((a>>22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is denormal; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float32_is_denormal(float32 a) +{ + return (extractFloat32Exp(a) == 0) && (extractFloat32Frac(a) != 0); +} + +/*---------------------------------------------------------------------------- +| Convert float32 denormals to zero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float32 float32_denormal_to_zero(float32 a) +{ + if (float32_is_denormal(a)) a &= 0x80000000; + return a; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE commonNaNT float32ToCommonNaN(float32 a, struct float_status_t *status) +{ + commonNaNT z; + if (float32_is_signaling_nan(a)) float_raise(status, float_flag_invalid); + z.sign = a>>31; + z.lo = 0; + z.hi = ((Bit64u) a)<<41; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the single- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float32 commonNaNToFloat32(commonNaNT a) +{ + return (((Bit32u) a.sign)<<31) | 0x7FC00000 | (Bit32u)(a.hi>>41); +} + +/*---------------------------------------------------------------------------- +| Takes two single-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float32 propagateFloat32NaN(float32 a, float32 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes single-precision floating-point NaN `a' and returns the appropriate +| NaN result. If `a' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float32 propagateFloat32NaNOne(float32 a, struct float_status_t *status) +{ + if (float32_is_signaling_nan(a)) + float_raise(status, float_flag_invalid); + + return a | 0x00400000; +} + +/*---------------------------------------------------------------------------- +| Commonly used single-precision floating point constants +*----------------------------------------------------------------------------*/ +extern const float64 float64_negative_inf; +extern const float64 float64_positive_inf; +extern const float64 float64_negative_zero; +extern const float64 float64_positive_zero; +extern const float64 float64_negative_one; +extern const float64 float64_positive_one; +extern const float64 float64_max_float; +extern const float64 float64_min_float; + +/*---------------------------------------------------------------------------- +| The pattern for a default generated double-precision NaN. +*----------------------------------------------------------------------------*/ +extern const float64 float64_default_nan; + +#define float64_fraction extractFloat64Frac +#define float64_exp extractFloat64Exp +#define float64_sign extractFloat64Sign + +#define FLOAT64_EXP_BIAS 0x3FF + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a) +{ + return a & BX_CONST64(0x000FFFFFFFFFFFFF); +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a) +{ + return (Bit16s)(a>>52) & 0x7FF; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the double-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int extractFloat64Sign(float64 a) +{ + return (int)(a>>63); +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +| double-precision floating-point value, returning the result. After being +| shifted into the proper positions, the three fields are simply added +| together to form the result. This means that any integer portion of `zSig' +| will be added into the exponent. Since a properly normalized significand +| will have an integer portion equal to 1, the `zExp' input should be 1 less +| than the desired result exponent whenever `zSig' is a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float64 packFloat64(int zSign, Bit16s zExp, Bit64u zSig) +{ + return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float64_is_nan(float64 a) +{ + return (BX_CONST64(0xFFE0000000000000) < (Bit64u) (a<<1)); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float64_is_signaling_nan(float64 a) +{ + return (((a>>51) & 0xFFF) == 0xFFE) && (a & BX_CONST64(0x0007FFFFFFFFFFFF)); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is denormal; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float64_is_denormal(float64 a) +{ + return (extractFloat64Exp(a) == 0) && (extractFloat64Frac(a) != 0); +} + +/*---------------------------------------------------------------------------- +| Convert float64 denormals to zero. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float64 float64_denormal_to_zero(float64 a) +{ + if (float64_is_denormal(a)) a &= ((Bit64u)(1) << 63); + return a; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE commonNaNT float64ToCommonNaN(float64 a, struct float_status_t *status) +{ + commonNaNT z; + if (float64_is_signaling_nan(a)) float_raise(status, float_flag_invalid); + z.sign = (int)(a>>63); + z.lo = 0; + z.hi = a<<12; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the double- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float64 commonNaNToFloat64(commonNaNT a) +{ + return (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FF8000000000000) | (a.hi>>12); +} + +/*---------------------------------------------------------------------------- +| Takes two double-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float64 propagateFloat64NaN(float64 a, float64 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes double-precision floating-point NaN `a' and returns the appropriate +| NaN result. If `a' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float64 propagateFloat64NaNOne(float64 a, struct float_status_t *status) +{ + if (float64_is_signaling_nan(a)) + float_raise(status, float_flag_invalid); + + return a | BX_CONST64(0x0008000000000000); +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. The +| `high' and `low' values hold the most- and least-significant bits, +| respectively. +*----------------------------------------------------------------------------*/ +#define floatx80_default_nan_exp 0xFFFF +#define floatx80_default_nan_fraction BX_CONST64(0xC000000000000000) + +#define floatx80_fraction extractFloatx80Frac +#define floatx80_exp extractFloatx80Exp +#define floatx80_sign extractFloatx80Sign + +#define FLOATX80_EXP_BIAS 0x3FFF + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit64u extractFloatx80Frac(floatx80 a) +{ + return a.fraction; +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit32s extractFloatx80Exp(floatx80 a) +{ + return a.exp & 0x7FFF; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int extractFloatx80Sign(floatx80 a) +{ + return a.exp>>15; +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an +| extended double-precision floating-point value, returning the result. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE floatx80 packFloatx80(int zSign, Bit32s zExp, Bit64u zSig) +{ + floatx80 z; + z.fraction = zSig; + z.exp = (zSign << 15) + zExp; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int floatx80_is_nan(floatx80 a) +{ + return ((a.exp & 0x7FFF) == 0x7FFF) && (Bit64s) (a.fraction<<1); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int floatx80_is_signaling_nan(floatx80 a) +{ + Bit64u aLow = a.fraction & ~BX_CONST64(0x4000000000000000); + return ((a.exp & 0x7FFF) == 0x7FFF) && + ((Bit64u) (aLow<<1)) && (a.fraction == aLow); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is an +| unsupported; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int floatx80_is_unsupported(floatx80 a) +{ + return ((a.exp & 0x7FFF) && !(a.fraction & BX_CONST64(0x8000000000000000))); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +| invalid exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE commonNaNT floatx80ToCommonNaN(floatx80 a, struct float_status_t *status) +{ + commonNaNT z; + if (floatx80_is_signaling_nan(a)) float_raise(status, float_flag_invalid); + z.sign = a.exp >> 15; + z.lo = 0; + z.hi = a.fraction << 1; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the extended +| double-precision floating-point format. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE floatx80 commonNaNToFloatx80(commonNaNT a) +{ + floatx80 z; + z.fraction = BX_CONST64(0xC000000000000000) | (a.hi>>1); + z.exp = (((Bit16u) a.sign)<<15) | 0x7FFF; + return z; +} + +/*---------------------------------------------------------------------------- +| Takes two extended double-precision floating-point values `a' and `b', one +| of which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Takes extended double-precision floating-point NaN `a' and returns the +| appropriate NaN result. If `a' is a signaling NaN, the invalid exception +| is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE floatx80 propagateFloatx80NaNOne(floatx80 a, struct float_status_t *status) +{ + if (floatx80_is_signaling_nan(a)) + float_raise(status, float_flag_invalid); + + a.fraction |= BX_CONST64(0xC000000000000000); + + return a; +} + +#endif /* FLOATX80 */ + +#ifdef FLOAT128 + +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. The `high' and +| `low' values hold the most- and least-significant bits, respectively. +*----------------------------------------------------------------------------*/ +#define float128_default_nan_hi BX_CONST64(0xFFFF800000000000) +#define float128_default_nan_lo BX_CONST64(0x0000000000000000) + +#define float128_exp extractFloat128Exp + +/*---------------------------------------------------------------------------- +| Returns the least-significant 64 fraction bits of the quadruple-precision +| floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit64u extractFloat128Frac1(float128 a) +{ + return a.lo; +} + +/*---------------------------------------------------------------------------- +| Returns the most-significant 48 fraction bits of the quadruple-precision +| floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit64u extractFloat128Frac0(float128 a) +{ + return a.hi & BX_CONST64(0x0000FFFFFFFFFFFF); +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the quadruple-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE Bit32s extractFloat128Exp(float128 a) +{ + return ((Bit32s)(a.hi>>48)) & 0x7FFF; +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the quadruple-precision floating-point value `a'. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int extractFloat128Sign(float128 a) +{ + return (int)(a.hi >> 63); +} + +/*---------------------------------------------------------------------------- +| Packs the sign `zSign', the exponent `zExp', and the significand formed +| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision +| floating-point value, returning the result. After being shifted into the +| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply +| added together to form the most significant 32 bits of the result. This +| means that any integer portion of `zSig0' will be added into the exponent. +| Since a properly normalized significand will have an integer portion equal +| to 1, the `zExp' input should be 1 less than the desired result exponent +| whenever `zSig0' and `zSig1' concatenated form a complete, normalized +| significand. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float128 packFloat128Four(int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1) +{ + float128 z; + z.lo = zSig1; + z.hi = (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<48) + zSig0; + return z; +} + +/*---------------------------------------------------------------------------- +| Packs two 64-bit precision integers into into the quadruple-precision +| floating-point value, returning the result. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float128 packFloat128(Bit64u zHi, Bit64u zLo) +{ + float128 z; + z.lo = zLo; + z.hi = zHi; + return z; +} + +#ifdef _MSC_VER +#define PACK_FLOAT_128(hi,lo) { lo, hi } +#else +#define PACK_FLOAT_128(hi,lo) packFloat128(BX_CONST64(hi),BX_CONST64(lo)) +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float128_is_nan(float128 a) +{ + return (BX_CONST64(0xFFFE000000000000) <= (Bit64u) (a.hi<<1)) + && (a.lo || (a.hi & BX_CONST64(0x0000FFFFFFFFFFFF))); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the quadruple-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float128_is_signaling_nan(float128 a) +{ + return (((a.hi>>47) & 0xFFFF) == 0xFFFE) + && (a.lo || (a.hi & BX_CONST64(0x00007FFFFFFFFFFF))); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE commonNaNT float128ToCommonNaN(float128 a, struct float_status_t *status) +{ + commonNaNT z; + if (float128_is_signaling_nan(a)) float_raise(status, float_flag_invalid); + z.sign = (int)(a.hi>>63); + shortShift128Left(a.hi, a.lo, 16, &z.hi, &z.lo); + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the quadruple- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE float128 commonNaNToFloat128(commonNaNT a) +{ + float128 z; + shift128Right(a.hi, a.lo, 16, &z.hi, &z.lo); + z.hi |= (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FFF800000000000); + return z; +} + +/*---------------------------------------------------------------------------- +| Takes two quadruple-precision floating-point values `a' and `b', one of +| which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +float128 propagateFloat128NaN(float128 a, float128 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| The pattern for a default generated quadruple-precision NaN. +*----------------------------------------------------------------------------*/ +extern const float128 float128_default_nan; + +#endif /* FLOAT128 */ + +#endif diff --git a/src/cpu/softfloat/softfloat.cc b/src/cpu/softfloat/softfloat.cc new file mode 100644 index 000000000..0802089b9 --- /dev/null +++ b/src/cpu/softfloat/softfloat.cc @@ -0,0 +1,4012 @@ +/*============================================================================ +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#define FLOAT128 + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloat.h" +#include "softfloat-round-pack.h" + +/*---------------------------------------------------------------------------- +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target +| if desired). +*----------------------------------------------------------------------------*/ +#define USE_estimateDiv128To64 +#define USE_estimateSqrt32 +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| Functions and definitions to determine: (1) whether tininess for underflow +| is detected before or after rounding by default, (2) what (if anything) +| happens when exceptions are raised, (3) how signaling NaNs are distinguished +| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs +| are propagated from function inputs to output. These details are target- +| specific. +*----------------------------------------------------------------------------*/ +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the single-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +const unsigned float_all_exceptions_mask = 0x3f; + +float32 int32_to_float32(Bit32s a, struct float_status_t *status) +{ + if (a == 0) return 0; + if (a == (Bit32s) 0x80000000) return packFloat32(1, 0x9E, 0); + int zSign = (a < 0); + return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 int32_to_float64(Bit32s a) +{ + if (a == 0) return 0; + int zSign = (a < 0); + Bit32u absA = zSign ? -a : a; + int shiftCount = countLeadingZeros32(absA) + 21; + Bit64u zSig = absA; + return packFloat64(zSign, 0x432 - shiftCount, zSig<> 1, status); + return normalizeRoundAndPackFloat32(0, 0x9C, a, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit unsigned integer `a' to the +| double-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 uint32_to_float64(Bit32u a) +{ + if (a == 0) return 0; + int shiftCount = countLeadingZeros32(a) + 21; + Bit64u zSig = a; + return packFloat64(0, 0x432 - shiftCount, zSig<> 1, status); + return normalizeRoundAndPackFloat64(0, 0x43C, a, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic - which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the +| conversion overflows the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s float32_to_int32(float32 a, struct float_status_t *status) +{ + Bit32u aSig = extractFloat32Frac(a); + Bit16s aExp = extractFloat32Exp(a); + int aSign = extractFloat32Sign(a); + if ((aExp == 0xFF) && aSig) aSign = 0; + if (aExp) aSig |= 0x00800000; + else { + if (get_denormals_are_zeros(status)) aSig = 0; + } + int shiftCount = 0xAF - aExp; + Bit64u aSig64 = Bit64u(aSig) << 32; + if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount); + return roundAndPackInt32(aSign, aSig64, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN or the conversion overflows, the integer indefinite +| value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s float32_to_int32_round_to_zero(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit32u aSig; + Bit32s z; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + int shiftCount = aExp - 0x9E; + if (0 <= shiftCount) { + if (a != 0xCF000000) { + float_raise(status, float_flag_invalid); + } + return (Bit32s)(int32_indefinite); + } + else if (aExp <= 0x7E) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp | aSig) float_raise(status, float_flag_inexact); + return 0; + } + aSig = (aSig | 0x800000)<<8; + z = aSig>>(-shiftCount); + if ((Bit32u) (aSig<<(shiftCount & 31))) { + float_raise(status, float_flag_inexact); + } + if (aSign) z = -z; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit unsigned integer format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-point Arithmetic, +| except that the conversion is always rounded toward zero. If `a' is a NaN +| or conversion overflows, the largest positive integer is returned. +*----------------------------------------------------------------------------*/ + +Bit32u float32_to_uint32_round_to_zero(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit32u aSig; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + int shiftCount = aExp - 0x9E; + + if (aExp <= 0x7E) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp | aSig) float_raise(status, float_flag_inexact); + return 0; + } + else if (0 < shiftCount || aSign) { + float_raise(status, float_flag_invalid); + return uint32_indefinite; + } + + aSig = (aSig | 0x800000)<<8; + Bit32u z = aSig >> (-shiftCount); + if (aSig << (shiftCount & 31)) { + float_raise(status, float_flag_inexact); + } + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic - which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the +| conversion overflows, the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit64s float32_to_int64(float32 a, struct float_status_t *status) +{ + Bit64u aSig64, aSigExtra; + + Bit32u aSig = extractFloat32Frac(a); + Bit16s aExp = extractFloat32Exp(a); + int aSign = extractFloat32Sign(a); + + int shiftCount = 0xBE - aExp; + if (shiftCount < 0) { + float_raise(status, float_flag_invalid); + return (Bit64s)(int64_indefinite); + } + if (aExp) aSig |= 0x00800000; + else { + if (get_denormals_are_zeros(status)) aSig = 0; + } + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackInt64(aSign, aSig64, aSigExtra, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN or the conversion overflows, the integer indefinite +| value is returned. +*----------------------------------------------------------------------------*/ + +Bit64s float32_to_int64_round_to_zero(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit32u aSig; + Bit64u aSig64; + Bit64s z; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + int shiftCount = aExp - 0xBE; + if (0 <= shiftCount) { + if (a != 0xDF000000) { + float_raise(status, float_flag_invalid); + } + return (Bit64s)(int64_indefinite); + } + else if (aExp <= 0x7E) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp | aSig) float_raise(status, float_flag_inexact); + return 0; + } + aSig64 = aSig | 0x00800000; + aSig64 <<= 40; + z = aSig64>>(-shiftCount); + if ((Bit64u) (aSig64<<(shiftCount & 63))) { + float_raise(status, float_flag_inexact); + } + if (aSign) z = -z; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, +| except that the conversion is always rounded toward zero. If `a' is a NaN +| or the conversion overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u float32_to_uint64_round_to_zero(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit32u aSig; + Bit64u aSig64; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + int shiftCount = aExp - 0xBE; + + if (aExp <= 0x7E) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp | aSig) float_raise(status, float_flag_inexact); + return 0; + } + else if (0 < shiftCount || aSign) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + aSig64 = aSig | 0x00800000; + aSig64 <<= 40; + Bit64u z = aSig64>>(-shiftCount); + if ((Bit64u) (aSig64<<(shiftCount & 63))) { + float_raise(status, float_flag_inexact); + } + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the conversion +| overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u float32_to_uint64(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp, shiftCount; + Bit32u aSig; + Bit64u aSig64, aSigExtra; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + } + + if ((aSign) && (aExp > 0x7E)) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + shiftCount = 0xBE - aExp; + if (aExp) aSig |= 0x00800000; + + if (shiftCount < 0) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackUint64(aSign, aSig64, aSigExtra, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 32-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the conversion +| overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit32u float32_to_uint32(float32 a, struct float_status_t *status) +{ + Bit64u val_64 = float32_to_uint64(a, status); + + if (val_64 > 0xffffffff) { + status->float_exception_flags = float_flag_invalid; // throw away other flags + return uint32_indefinite; + } + + return (Bit32u) val_64; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the double-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float32_to_float64(float32 a, struct float_status_t *status) +{ + Bit32u aSig = extractFloat32Frac(a); + Bit16s aExp = extractFloat32Exp(a); + int aSign = extractFloat32Sign(a); + + if (aExp == 0xFF) { + if (aSig) return commonNaNToFloat64(float32ToCommonNaN(a, status)); + return packFloat64(aSign, 0x7FF, 0); + } + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return packFloat64(aSign, 0, 0); + + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + --aExp; + } + return packFloat64(aSign, aExp + 0x380, ((Bit64u) aSig)<<29); +} + +/*---------------------------------------------------------------------------- +| Rounds the single-precision floating-point value `a' to an integer, and +| returns the result as a single-precision floating-point value. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_round_to_int(float32 a, Bit8u scale, struct float_status_t *status) +{ + Bit32u lastBitMask, roundBitsMask; + int roundingMode = get_float_rounding_mode(status); + Bit16s aExp = extractFloat32Exp(a); + scale &= 0xf; + + if ((aExp == 0xFF) && extractFloat32Frac(a)) { + return propagateFloat32NaNOne(a, status); + } + + aExp += scale; // scale the exponent + + if (0x96 <= aExp) { + return a; + } + + if (get_denormals_are_zeros(status)) { + a = float32_denormal_to_zero(a); + } + + if (aExp <= 0x7E) { + if ((Bit32u) (a<<1) == 0) return a; + float_raise(status, float_flag_inexact); + int aSign = extractFloat32Sign(a); + switch (roundingMode) { + case float_round_nearest_even: + if ((aExp == 0x7E) && extractFloat32Frac(a)) { + return packFloat32(aSign, 0x7F - scale, 0); + } + break; + case float_round_down: + return aSign ? packFloat32(1, 0x7F - scale, 0) : float32_positive_zero; + case float_round_up: + return aSign ? float32_negative_zero : packFloat32(0, 0x7F - scale, 0); + } + return packFloat32(aSign, 0, 0); + } + + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + float32 z = a; + if (roundingMode == float_round_nearest_even) { + z += lastBitMask>>1; + if ((z & roundBitsMask) == 0) z &= ~lastBitMask; + } + else if (roundingMode != float_round_to_zero) { + if (extractFloat32Sign(z) ^ (roundingMode == float_round_up)) { + z += roundBitsMask; + } + } + z &= ~roundBitsMask; + if (z != a) float_raise(status, float_flag_inexact); + return z; +} + +/*---------------------------------------------------------------------------- +| Extracts the fractional portion of single-precision floating-point value `a', +| and returns the result as a single-precision floating-point value. The +| fractional results are precise. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_frc(float32 a, struct float_status_t *status) +{ + int roundingMode = get_float_rounding_mode(status); + + Bit16s aExp = extractFloat32Exp(a); + Bit32u aSig = extractFloat32Frac(a); + int aSign = extractFloat32Sign(a); + + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaNOne(a, status); + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + + if (aExp >= 0x96) { + return packFloat32(roundingMode == float_round_down, 0, 0); + } + + if (aExp < 0x7F) { + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return packFloat32(roundingMode == float_round_down, 0, 0); + + float_raise(status, float_flag_denormal); + if (! float_exception_masked(status, float_flag_underflow)) + float_raise(status, float_flag_underflow); + + if(get_flush_underflow_to_zero(status)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat32(aSign, 0, 0); + } + } + return a; + } + + Bit32u lastBitMask = 1 << (0x96 - aExp); + Bit32u roundBitsMask = lastBitMask - 1; + + aSig &= roundBitsMask; + aSig <<= 7; + aExp--; + + if (aSig == 0) + return packFloat32(roundingMode == float_round_down, 0, 0); + + return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status); +} + +/*---------------------------------------------------------------------------- +| Extracts the exponent portion of single-precision floating-point value 'a', +| and returns the result as a single-precision floating-point value +| representing unbiased integer exponent. The operation is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_getexp(float32 a, struct float_status_t *status) +{ + Bit16s aExp = extractFloat32Exp(a); + Bit32u aSig = extractFloat32Frac(a); + + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaNOne(a, status); + return float32_positive_inf; + } + + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return float32_negative_inf; + + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + + return int32_to_float32(aExp - 0x7F, status); +} + +/*---------------------------------------------------------------------------- +| Extracts the mantissa of single-precision floating-point value 'a' and +| returns the result as a single-precision floating-point after applying +| the mantissa interval normalization and sign control. The operation is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_getmant(float32 a, struct float_status_t *status, int sign_ctrl, int interv) +{ + Bit16s aExp = extractFloat32Exp(a); + Bit32u aSig = extractFloat32Frac(a); + int aSign = extractFloat32Sign(a); + + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaNOne(a, status); + if (aSign) { + if (sign_ctrl & 0x2) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + } + return packFloat32(~sign_ctrl & aSign, 0x7F, 0); + } + + if (aExp == 0 && (aSig == 0 || get_denormals_are_zeros(status))) { + return packFloat32(~sign_ctrl & aSign, 0x7F, 0); + } + + if (aSign) { + if (sign_ctrl & 0x2) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + } + + if (aExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); +// aExp += 0x7E; + aSig &= 0x7FFFFF; + } + + switch(interv) { + case 0x0: // interval [1,2) + aExp = 0x7F; + break; + case 0x1: // interval [1/2,2) + aExp -= 0x7F; + aExp = 0x7F - (aExp & 0x1); + break; + case 0x2: // interval [1/2,1) + aExp = 0x7E; + break; + case 0x3: // interval [3/4,3/2) + aExp = 0x7F - ((aSig >> 22) & 0x1); + break; + } + + return packFloat32(~sign_ctrl & aSign, aExp, aSig); +} + +/*---------------------------------------------------------------------------- +| Return the result of a floating point scale of the single-precision floating +| point value `a' by multiplying it by 2 power of the single-precision +| floating point value 'b' converted to integral value. If the result cannot +| be represented in single precision, then the proper overflow response (for +| positive scaling operand), or the proper underflow response (for negative +| scaling operand) is issued. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_scalef(float32 a, float32 b, struct float_status_t *status) +{ + Bit32u aSig = extractFloat32Frac(a); + Bit16s aExp = extractFloat32Exp(a); + int aSign = extractFloat32Sign(a); + Bit32u bSig = extractFloat32Frac(b); + Bit16s bExp = extractFloat32Exp(b); + int bSign = extractFloat32Sign(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + } + + if (aExp == 0xFF) { + if (aSig) { + int aIsSignalingNaN = (aSig & 0x00400000) == 0; + if (aIsSignalingNaN || bExp != 0xFF || bSig) + return propagateFloat32NaN(a, b, status); + + return bSign ? 0 : float32_positive_inf; + } + + if (bExp == 0xFF && bSign) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + return a; + } + + if (aExp == 0) { + if (aSig == 0) { + if (bExp == 0xFF && ! bSign) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + return a; + } + float_raise(status, float_flag_denormal); + } + + if ((bExp | bSig) == 0) return a; + + if (bExp == 0xFF) { + if (bSign) return packFloat32(aSign, 0, 0); + return packFloat32(aSign, 0xFF, 0); + } + + if (bExp >= 0x8E) { + // handle obvious overflow/underflow result + return roundAndPackFloat32(aSign, bSign ? -0x7F : 0xFF, aSig, status); + } + + int scale = 0; + + if (bExp <= 0x7E) { + if (bExp == 0) + float_raise(status, float_flag_denormal); + scale = -bSign; + } + else { + int shiftCount = bExp - 0x9E; + bSig = (bSig | 0x800000)<<8; + scale = bSig>>(-shiftCount); + + if (bSign) { + if ((Bit32u) (bSig<<(shiftCount & 31))) scale++; + scale = -scale; + } + + if (scale > 0x200) scale = 0x200; + if (scale < -0x200) scale = -0x200; + } + + if (aExp != 0) { + aSig |= 0x00800000; + } else { + aExp++; + } + + aExp += scale - 1; + aSig <<= 7; + return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the single-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 addFloat32Sigs(float32 a, float32 b, int zSign, struct float_status_t *status) +{ + Bit16s aExp, bExp, zExp; + Bit32u aSig, bSig, zSig; + Bit16s expDiff; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + + if (0 < expDiff) { + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaN(a, b, status); + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if ((aExp == 0) && aSig) + float_raise(status, float_flag_denormal); + + if (bExp == 0) { + if (bSig) float_raise(status, float_flag_denormal); + --expDiff; + } + else bSig |= 0x20000000; + + bSig = shift32RightJamming(bSig, expDiff); + zExp = aExp; + } + else if (expDiff < 0) { + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0xFF, 0); + } + if ((bExp == 0) && bSig) + float_raise(status, float_flag_denormal); + + if (aExp == 0) { + if (aSig) float_raise(status, float_flag_denormal); + ++expDiff; + } + else aSig |= 0x20000000; + + aSig = shift32RightJamming(aSig, -expDiff); + zExp = bExp; + } + else { + if (aExp == 0xFF) { + if (aSig | bSig) return propagateFloat32NaN(a, b, status); + return a; + } + if (aExp == 0) { + zSig = (aSig + bSig) >> 6; + if (aSig | bSig) { + float_raise(status, float_flag_denormal); + if (get_flush_underflow_to_zero(status) && (extractFloat32Frac(zSig) == zSig)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat32(zSign, 0, 0); + } + if (! float_exception_masked(status, float_flag_underflow)) { + if (extractFloat32Frac(zSig) == zSig) + float_raise(status, float_flag_underflow); + } + } + return packFloat32(zSign, 0, zSig); + } + zSig = 0x40000000 + aSig + bSig; + return roundAndPackFloat32(zSign, aExp, zSig, status); + } + aSig |= 0x20000000; + zSig = (aSig + bSig)<<1; + --zExp; + if ((Bit32s) zSig < 0) { + zSig = aSig + bSig; + ++zExp; + } + return roundAndPackFloat32(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the single- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 subFloat32Sigs(float32 a, float32 b, int zSign, struct float_status_t *status) +{ + Bit16s aExp, bExp, zExp; + Bit32u aSig, bSig, zSig; + Bit16s expDiff; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if (0 < expDiff) goto aExpBigger; + if (expDiff < 0) goto bExpBigger; + if (aExp == 0xFF) { + if (aSig | bSig) return propagateFloat32NaN(a, b, status); + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + if (aExp == 0) { + if (aSig | bSig) float_raise(status, float_flag_denormal); + aExp = 1; + bExp = 1; + } + if (bSig < aSig) goto aBigger; + if (aSig < bSig) goto bBigger; + return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0); + bExpBigger: + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign ^ 1, 0xFF, 0); + } + if ((bExp == 0) && bSig) + float_raise(status, float_flag_denormal); + + if (aExp == 0) { + if (aSig) float_raise(status, float_flag_denormal); + ++expDiff; + } + else aSig |= 0x40000000; + + aSig = shift32RightJamming(aSig, -expDiff); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaN(a, b, status); + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if ((aExp == 0) && aSig) + float_raise(status, float_flag_denormal); + + if (bExp == 0) { + if (bSig) float_raise(status, float_flag_denormal); + --expDiff; + } + else bSig |= 0x40000000; + + bSig = shift32RightJamming(bSig, expDiff); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the single-precision floating-point values `a' +| and `b'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_add(float32 a, float32 b, struct float_status_t *status) +{ + int aSign = extractFloat32Sign(a); + int bSign = extractFloat32Sign(b); + + if (aSign == bSign) { + return addFloat32Sigs(a, b, aSign, status); + } + else { + return subFloat32Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the single-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_sub(float32 a, float32 b, struct float_status_t *status) +{ + int aSign = extractFloat32Sign(a); + int bSign = extractFloat32Sign(b); + + if (aSign == bSign) { + return subFloat32Sigs(a, b, aSign, status); + } + else { + return addFloat32Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the single-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_mul(float32 a, float32 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit16s aExp, bExp, zExp; + Bit32u aSig, bSig; + Bit64u zSig64; + Bit32u zSig; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + bSign = extractFloat32Sign(b); + zSign = aSign ^ bSign; + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (aExp == 0xFF) { + if (aSig || ((bExp == 0xFF) && bSig)) + return propagateFloat32NaN(a, b, status); + + if ((bExp | bSig) == 0) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0xFF, 0); + } + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + if ((aExp | aSig) == 0) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0xFF, 0); + } + if (aExp == 0) { + if (aSig == 0) { + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return packFloat32(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(bSig, &bExp, &bSig); + } + zExp = aExp + bExp - 0x7F; + aSig = (aSig | 0x00800000)<<7; + bSig = (bSig | 0x00800000)<<8; + zSig64 = shift64RightJamming(((Bit64u) aSig) * bSig, 32); + zSig = (Bit32u) zSig64; + if (0 <= (Bit32s) (zSig<<1)) { + zSig <<= 1; + --zExp; + } + return roundAndPackFloat32(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the single-precision floating-point value `a' +| by the corresponding value `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_div(float32 a, float32 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit16s aExp, bExp, zExp; + Bit32u aSig, bSig, zSig; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + bSig = extractFloat32Frac(b); + bExp = extractFloat32Exp(b); + bSign = extractFloat32Sign(b); + zSign = aSign ^ bSign; + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaN(a, b, status); + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0xFF, 0); + } + if (bExp == 0xFF) { + if (bSig) return propagateFloat32NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat32(zSign, 0, 0); + } + if (bExp == 0) { + if (bSig == 0) { + if ((aExp | aSig) == 0) { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + float_raise(status, float_flag_divbyzero); + return packFloat32(zSign, 0xFF, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0) { + if (aSig == 0) return packFloat32(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + zExp = aExp - bExp + 0x7D; + aSig = (aSig | 0x00800000)<<7; + bSig = (bSig | 0x00800000)<<8; + if (bSig <= (aSig + aSig)) { + aSig >>= 1; + ++zExp; + } + zSig = (((Bit64u) aSig)<<32) / bSig; + if ((zSig & 0x3F) == 0) { + zSig |= ((Bit64u) bSig * zSig != ((Bit64u) aSig)<<32); + } + return roundAndPackFloat32(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the single-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_sqrt(float32 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp, zExp; + Bit32u aSig, zSig; + Bit64u rem, term; + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + + if (aExp == 0xFF) { + if (aSig) return propagateFloat32NaNOne(a, status); + if (! aSign) return a; + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + } + + if (aSign) { + if ((aExp | aSig) == 0) return packFloat32(aSign, 0, 0); + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + if (aExp == 0) { + if (aSig == 0) return 0; + float_raise(status, float_flag_denormal); + normalizeFloat32Subnormal(aSig, &aExp, &aSig); + } + zExp = ((aExp - 0x7F)>>1) + 0x7E; + aSig = (aSig | 0x00800000)<<8; + zSig = estimateSqrt32(aExp, aSig) + 2; + if ((zSig & 0x7F) <= 5) { + if (zSig < 2) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + aSig >>= aExp & 1; + term = ((Bit64u) zSig) * zSig; + rem = (((Bit64u) aSig)<<32) - term; + while ((Bit64s) rem < 0) { + --zSig; + rem += (((Bit64u) zSig)<<1) | 1; + } + zSig |= (rem != 0); + } + zSig = shift32RightJamming(zSig, 1); + roundAndPack: + return roundAndPackFloat32(0, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Determine single-precision floating-point number class. +*----------------------------------------------------------------------------*/ + +float_class_t float32_class(float32 a) +{ + Bit16s aExp = extractFloat32Exp(a); + Bit32u aSig = extractFloat32Frac(a); + int aSign = extractFloat32Sign(a); + + if(aExp == 0xFF) { + if (aSig == 0) + return (aSign) ? float_negative_inf : float_positive_inf; + + return (aSig & 0x00400000) ? float_QNaN : float_SNaN; + } + + if(aExp == 0) { + if (aSig == 0) return float_zero; + return float_denormal; + } + + return float_normalized; +} + +/*---------------------------------------------------------------------------- +| Compare between two single precision floating point numbers. Returns +| 'float_relation_equal' if the operands are equal, 'float_relation_less' if +| the value 'a' is less than the corresponding value `b', +| 'float_relation_greater' if the value 'a' is greater than the corresponding +| value `b', or 'float_relation_unordered' otherwise. +*----------------------------------------------------------------------------*/ + +int float32_compare(float32 a, float32 b, int quiet, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float32_denormal_to_zero(a); + b = float32_denormal_to_zero(b); + } + + float_class_t aClass = float32_class(a); + float_class_t bClass = float32_class(b); + + if (aClass == float_SNaN || bClass == float_SNaN) { + float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_QNaN || bClass == float_QNaN) { + if (! quiet) float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_denormal || bClass == float_denormal) { + float_raise(status, float_flag_denormal); + } + + if ((a == b) || ((Bit32u) ((a | b)<<1) == 0)) return float_relation_equal; + + int aSign = extractFloat32Sign(a); + int bSign = extractFloat32Sign(b); + if (aSign != bSign) + return (aSign) ? float_relation_less : float_relation_greater; + + if (aSign ^ (a < b)) return float_relation_less; + return float_relation_greater; +} + +/*---------------------------------------------------------------------------- +| Compare between two single precision floating point numbers and return the +| smaller of them. +*----------------------------------------------------------------------------*/ + +float32 float32_min(float32 a, float32 b, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float32_denormal_to_zero(a); + b = float32_denormal_to_zero(b); + } + + return (float32_compare_two(a, b, status) == float_relation_less) ? a : b; +} + +/*---------------------------------------------------------------------------- +| Compare between two single precision floating point numbers and return the +| larger of them. +*----------------------------------------------------------------------------*/ + +float32 float32_max(float32 a, float32 b, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float32_denormal_to_zero(a); + b = float32_denormal_to_zero(b); + } + + return (float32_compare_two(a, b, status) == float_relation_greater) ? a : b; +} + +/*---------------------------------------------------------------------------- +| Compare between two single precision floating point numbers and return the +| smaller/larger of them. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float32_minmax(float32 a, float32 b, int is_max, int is_abs, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float32_denormal_to_zero(a); + b = float32_denormal_to_zero(b); + } + + if (float32_is_nan(a) || float32_is_nan(b)) { + if (float32_is_signaling_nan(a)) { + return propagateFloat32NaNOne(a, status); + } + if (float32_is_signaling_nan(b) ) { + return propagateFloat32NaNOne(b, status); + } + if (! float32_is_nan(b)) { + if (float32_is_denormal(b)) + float_raise(status, float_flag_denormal); + return b; + } + if (! float32_is_nan(a)) { + if (float32_is_denormal(a)) + float_raise(status, float_flag_denormal); + return a; + } + return propagateFloat32NaN(a, b, status); + } + + float32 tmp_a = a, tmp_b = b; + if (is_abs) { + tmp_a &= ~0x80000000; // clear the sign bit + tmp_b &= ~0x80000000; + } + + int aSign = extractFloat32Sign(tmp_a); + int bSign = extractFloat32Sign(tmp_b); + + if (float32_is_denormal(a) || float32_is_denormal(b)) + float_raise(status, float_flag_denormal); + + if (aSign != bSign) { + if (! is_max) { + return aSign ? a : b; + } else { + return aSign ? b : a; + } + } else { + if (! is_max) { + return (aSign ^ (tmp_a < tmp_b)) ? a : b; + } else { + return (aSign ^ (tmp_a < tmp_b)) ? b : a; + } + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic - which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the +| conversion overflows, the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s float64_to_int32(float64 a, struct float_status_t *status) +{ + Bit64u aSig = extractFloat64Frac(a); + Bit16s aExp = extractFloat64Exp(a); + int aSign = extractFloat64Sign(a); + if ((aExp == 0x7FF) && aSig) aSign = 0; + if (aExp) aSig |= BX_CONST64(0x0010000000000000); + else { + if (get_denormals_are_zeros(status)) aSig = 0; + } + int shiftCount = 0x42C - aExp; + if (0 < shiftCount) aSig = shift64RightJamming(aSig, shiftCount); + return roundAndPackInt32(aSign, aSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 32-bit two's complement integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. +| If `a' is a NaN or the conversion overflows, the integer indefinite +| value is returned. +*----------------------------------------------------------------------------*/ + +Bit32s float64_to_int32_round_to_zero(float64 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit64u aSig, savedASig; + Bit32s z; + int shiftCount; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (0x41E < aExp) { + float_raise(status, float_flag_invalid); + return (Bit32s)(int32_indefinite); + } + else if (aExp < 0x3FF) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp || aSig) float_raise(status, float_flag_inexact); + return 0; + } + aSig |= BX_CONST64(0x0010000000000000); + shiftCount = 0x433 - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = (Bit32s) aSig; + if (aSign) z = -z; + if ((z < 0) ^ aSign) { + float_raise(status, float_flag_invalid); + return (Bit32s)(int32_indefinite); + } + if ((aSig<>= shiftCount; + if ((aSig<>(-shiftCount); + if ((Bit64u) (aSig<<(shiftCount & 63))) { + float_raise(status, float_flag_inexact); + } + } + if (aSign) z = -z; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic, +| except that the conversion is always rounded toward zero. If `a' is a NaN +| or the conversion overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u float64_to_uint64_round_to_zero(float64 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit64u aSig, z; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + + if (aExp < 0x3FE) { + if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0; + if (aExp | aSig) float_raise(status, float_flag_inexact); + return 0; + } + + if (0x43E <= aExp || aSign) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + if (aExp) aSig |= BX_CONST64(0x0010000000000000); + int shiftCount = aExp - 0x433; + + if (0 <= shiftCount) { + z = aSig<>(-shiftCount); + if ((Bit64u) (aSig<<(shiftCount & 63))) { + float_raise(status, float_flag_inexact); + } + } + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 32-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the conversion +| overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit32u float64_to_uint32(float64 a, struct float_status_t *status) +{ + Bit64u val_64 = float64_to_uint64(a, status); + + if (val_64 > 0xffffffff) { + status->float_exception_flags = float_flag_invalid; // throw away other flags + return uint32_indefinite; + } + + return (Bit32u) val_64; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN or the conversion +| overflows, the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +Bit64u float64_to_uint64(float64 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp, shiftCount; + Bit64u aSig, aSigExtra; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + } + + if (aSign && (aExp > 0x3FE)) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + + if (aExp) { + aSig |= BX_CONST64(0x0010000000000000); + } + shiftCount = 0x433 - aExp; + if (shiftCount <= 0) { + if (0x43E < aExp) { + float_raise(status, float_flag_invalid); + return uint64_indefinite; + } + aSigExtra = 0; + aSig <<= -shiftCount; + } else { + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); + } + + return roundAndPackUint64(aSign, aSig, aSigExtra, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the single-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float64_to_float32(float64 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp; + Bit64u aSig; + Bit32u zSig; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if (aSig) return commonNaNToFloat32(float64ToCommonNaN(a, status)); + return packFloat32(aSign, 0xFF, 0); + } + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return packFloat32(aSign, 0, 0); + float_raise(status, float_flag_denormal); + } + aSig = shift64RightJamming(aSig, 22); + zSig = (Bit32u) aSig; + if (aExp || zSig) { + zSig |= 0x40000000; + aExp -= 0x381; + } + return roundAndPackFloat32(aSign, aExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Rounds the double-precision floating-point value `a' to an integer, and +| returns the result as a double-precision floating-point value. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_round_to_int(float64 a, Bit8u scale, struct float_status_t *status) +{ + Bit64u lastBitMask, roundBitsMask; + int roundingMode = get_float_rounding_mode(status); + Bit16s aExp = extractFloat64Exp(a); + scale &= 0xf; + + if ((aExp == 0x7FF) && extractFloat64Frac(a)) { + return propagateFloat64NaNOne(a, status); + } + + aExp += scale; // scale the exponent + + if (0x433 <= aExp) { + return a; + } + + if (get_denormals_are_zeros(status)) { + a = float64_denormal_to_zero(a); + } + + if (aExp < 0x3FF) { + if ((Bit64u) (a<<1) == 0) return a; + float_raise(status, float_flag_inexact); + int aSign = extractFloat64Sign(a); + switch (roundingMode) { + case float_round_nearest_even: + if ((aExp == 0x3FE) && extractFloat64Frac(a)) { + return packFloat64(aSign, 0x3FF - scale, 0); + } + break; + case float_round_down: + return aSign ? packFloat64(1, 0x3FF - scale, 0) : float64_positive_zero; + case float_round_up: + return aSign ? float64_negative_zero : packFloat64(0, 0x3FF - scale, 0); + } + return packFloat64(aSign, 0, 0); + } + + lastBitMask = 1; + lastBitMask <<= 0x433 - aExp; + roundBitsMask = lastBitMask - 1; + float64 z = a; + if (roundingMode == float_round_nearest_even) { + z += lastBitMask>>1; + if ((z & roundBitsMask) == 0) z &= ~lastBitMask; + } + else if (roundingMode != float_round_to_zero) { + if (extractFloat64Sign(z) ^ (roundingMode == float_round_up)) { + z += roundBitsMask; + } + } + z &= ~roundBitsMask; + if (z != a) float_raise(status, float_flag_inexact); + return z; +} + +/*---------------------------------------------------------------------------- +| Extracts the fractional portion of double-precision floating-point value `a', +| and returns the result as a double-precision floating-point value. The +| fractional results are precise. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_frc(float64 a, struct float_status_t *status) +{ + int roundingMode = get_float_rounding_mode(status); + + Bit64u aSig = extractFloat64Frac(a); + Bit16s aExp = extractFloat64Exp(a); + int aSign = extractFloat64Sign(a); + + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaNOne(a, status); + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + + if (aExp >= 0x433) { + return packFloat64(roundingMode == float_round_down, 0, 0); + } + + if (aExp < 0x3FF) { + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return packFloat64(roundingMode == float_round_down, 0, 0); + + float_raise(status, float_flag_denormal); + if (! float_exception_masked(status, float_flag_underflow)) + float_raise(status, float_flag_underflow); + + if(get_flush_underflow_to_zero(status)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat64(aSign, 0, 0); + } + } + return a; + } + + Bit64u lastBitMask = BX_CONST64(1) << (0x433 - aExp); + Bit64u roundBitsMask = lastBitMask - 1; + + aSig &= roundBitsMask; + aSig <<= 10; + aExp--; + + if (aSig == 0) + return packFloat64(roundingMode == float_round_down, 0, 0); + + return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status); +} + +/*---------------------------------------------------------------------------- +| Extracts the exponent portion of double-precision floating-point value 'a', +| and returns the result as a double-precision floating-point value +| representing unbiased integer exponent. The operation is performed according +| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_getexp(float64 a, struct float_status_t *status) +{ + Bit16s aExp = extractFloat64Exp(a); + Bit64u aSig = extractFloat64Frac(a); + + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaNOne(a, status); + return float64_positive_inf; + } + + if (aExp == 0) { + if (aSig == 0 || get_denormals_are_zeros(status)) + return float64_negative_inf; + + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + + return int32_to_float64(aExp - 0x3FF); +} + +/*---------------------------------------------------------------------------- +| Extracts the mantissa of double-precision floating-point value 'a' and +| returns the result as a double-precision floating-point after applying +| the mantissa interval normalization and sign control. The operation is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_getmant(float64 a, struct float_status_t *status, int sign_ctrl, int interv) +{ + Bit16s aExp = extractFloat64Exp(a); + Bit64u aSig = extractFloat64Frac(a); + int aSign = extractFloat64Sign(a); + + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaNOne(a, status); + if (aSign) { + if (sign_ctrl & 0x2) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + } + return packFloat64(~sign_ctrl & aSign, 0x3FF, 0); + } + + if (aExp == 0 && (aSig == 0 || get_denormals_are_zeros(status))) { + return packFloat64(~sign_ctrl & aSign, 0x3FF, 0); + } + + if (aSign) { + if (sign_ctrl & 0x2) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + } + + if (aExp == 0) { + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); +// aExp += 0x3FE; + aSig &= BX_CONST64(0xFFFFFFFFFFFFFFFF); + } + + switch(interv) { + case 0x0: // interval [1,2) + aExp = 0x3FF; + break; + case 0x1: // interval [1/2,2) + aExp -= 0x3FF; + aExp = 0x3FF - (aExp & 0x1); + break; + case 0x2: // interval [1/2,1) + aExp = 0x3FE; + break; + case 0x3: // interval [3/4,3/2) + aExp = 0x3FF - ((aSig >> 51) & 0x1); + break; + } + + return packFloat64(~sign_ctrl & aSign, aExp, aSig); +} + +/*---------------------------------------------------------------------------- +| Return the result of a floating point scale of the double-precision floating +| point value `a' by multiplying it by 2 power of the double-precision +| floating point value 'b' converted to integral value. If the result cannot +| be represented in double precision, then the proper overflow response (for +| positive scaling operand), or the proper underflow response (for negative +| scaling operand) is issued. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_scalef(float64 a, float64 b, struct float_status_t *status) +{ + Bit64u aSig = extractFloat64Frac(a); + Bit16s aExp = extractFloat64Exp(a); + int aSign = extractFloat64Sign(a); + Bit64u bSig = extractFloat64Frac(b); + Bit16s bExp = extractFloat64Exp(b); + int bSign = extractFloat64Sign(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + } + + if (aExp == 0x7FF) { + if (aSig) { + int aIsSignalingNaN = (aSig & BX_CONST64(0x0008000000000000)) == 0; + if (aIsSignalingNaN || bExp != 0x7FF || bSig) + return propagateFloat64NaN(a, b, status); + + return bSign ? 0 : float64_positive_inf; + } + + if (bExp == 0x7FF && bSign) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + return a; + } + + if (aExp == 0) { + if (aSig == 0) { + if (bExp == 0x7FF && ! bSign) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + return a; + } + float_raise(status, float_flag_denormal); + } + + if ((bExp | bSig) == 0) return a; + + if (bExp == 0x7FF) { + if (bSign) return packFloat64(aSign, 0, 0); + return packFloat64(aSign, 0x7FF, 0); + } + + if (0x40F <= bExp) { + // handle obvious overflow/underflow result + return roundAndPackFloat64(aSign, bSign ? -0x3FF : 0x7FF, aSig, status); + } + + int scale = 0; + + if (bExp < 0x3FF) { + if (bExp == 0) + float_raise(status, float_flag_denormal); + scale = -bSign; + } + else { + bSig |= BX_CONST64(0x0010000000000000); + int shiftCount = 0x433 - bExp; + Bit64u savedBSig = bSig; + bSig >>= shiftCount; + scale = (Bit32s) bSig; + if (bSign) { + if ((bSig< 0x1000) scale = 0x1000; + if (scale < -0x1000) scale = -0x1000; + } + + if (aExp != 0) { + aSig |= BX_CONST64(0x0010000000000000); + } else { + aExp++; + } + + aExp += scale - 1; + aSig <<= 10; + return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the double-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float64 addFloat64Sigs(float64 a, float64 b, int zSign, struct float_status_t *status) +{ + Bit16s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig; + Bit16s expDiff; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + expDiff = aExp - bExp; + aSig <<= 9; + bSig <<= 9; + if (0 < expDiff) { + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaN(a, b, status); + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if ((aExp == 0) && aSig) + float_raise(status, float_flag_denormal); + + if (bExp == 0) { + if (bSig) float_raise(status, float_flag_denormal); + --expDiff; + } + else bSig |= BX_CONST64(0x2000000000000000); + + bSig = shift64RightJamming(bSig, expDiff); + zExp = aExp; + } + else if (expDiff < 0) { + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0x7FF, 0); + } + if ((bExp == 0) && bSig) + float_raise(status, float_flag_denormal); + + if (aExp == 0) { + if (aSig) float_raise(status, float_flag_denormal); + ++expDiff; + } + else aSig |= BX_CONST64(0x2000000000000000); + + aSig = shift64RightJamming(aSig, -expDiff); + zExp = bExp; + } + else { + if (aExp == 0x7FF) { + if (aSig | bSig) return propagateFloat64NaN(a, b, status); + return a; + } + if (aExp == 0) { + zSig = (aSig + bSig) >> 9; + if (aSig | bSig) { + float_raise(status, float_flag_denormal); + if (get_flush_underflow_to_zero(status) && (extractFloat64Frac(zSig) == zSig)) { + float_raise(status, float_flag_underflow | float_flag_inexact); + return packFloat64(zSign, 0, 0); + } + if (! float_exception_masked(status, float_flag_underflow)) { + if (extractFloat64Frac(zSig) == zSig) + float_raise(status, float_flag_underflow); + } + } + return packFloat64(zSign, 0, zSig); + } + zSig = BX_CONST64(0x4000000000000000) + aSig + bSig; + return roundAndPackFloat64(zSign, aExp, zSig, status); + } + aSig |= BX_CONST64(0x2000000000000000); + zSig = (aSig + bSig)<<1; + --zExp; + if ((Bit64s) zSig < 0) { + zSig = aSig + bSig; + ++zExp; + } + return roundAndPackFloat64(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float64 subFloat64Sigs(float64 a, float64 b, int zSign, struct float_status_t *status) +{ + Bit16s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig; + Bit16s expDiff; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + expDiff = aExp - bExp; + aSig <<= 10; + bSig <<= 10; + if (0 < expDiff) goto aExpBigger; + if (expDiff < 0) goto bExpBigger; + if (aExp == 0x7FF) { + if (aSig | bSig) return propagateFloat64NaN(a, b, status); + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + if (aExp == 0) { + if (aSig | bSig) float_raise(status, float_flag_denormal); + aExp = 1; + bExp = 1; + } + if (bSig < aSig) goto aBigger; + if (aSig < bSig) goto bBigger; + return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0); + bExpBigger: + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign ^ 1, 0x7FF, 0); + } + if ((bExp == 0) && bSig) + float_raise(status, float_flag_denormal); + + if (aExp == 0) { + if (aSig) float_raise(status, float_flag_denormal); + ++expDiff; + } + else aSig |= BX_CONST64(0x4000000000000000); + + aSig = shift64RightJamming(aSig, -expDiff); + bSig |= BX_CONST64(0x4000000000000000); + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaN(a, b, status); + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if ((aExp == 0) && aSig) + float_raise(status, float_flag_denormal); + + if (bExp == 0) { + if (bSig) float_raise(status, float_flag_denormal); + --expDiff; + } + else bSig |= BX_CONST64(0x4000000000000000); + + bSig = shift64RightJamming(bSig, expDiff); + aSig |= BX_CONST64(0x4000000000000000); + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the double-precision floating-point values `a' +| and `b'. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_add(float64 a, float64 b, struct float_status_t *status) +{ + int aSign = extractFloat64Sign(a); + int bSign = extractFloat64Sign(b); + + if (aSign == bSign) { + return addFloat64Sigs(a, b, aSign, status); + } + else { + return subFloat64Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the double-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_sub(float64 a, float64 b, struct float_status_t *status) +{ + int aSign = extractFloat64Sign(a); + int bSign = extractFloat64Sign(b); + + if (aSign == bSign) { + return subFloat64Sigs(a, b, aSign, status); + } + else { + return addFloat64Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the double-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_mul(float64 a, float64 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit16s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig0, zSig1; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + bSign = extractFloat64Sign(b); + zSign = aSign ^ bSign; + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (aExp == 0x7FF) { + if (aSig || ((bExp == 0x7FF) && bSig)) { + return propagateFloat64NaN(a, b, status); + } + if ((bExp | bSig) == 0) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0x7FF, 0); + } + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + if ((aExp | aSig) == 0) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0x7FF, 0); + } + if (aExp == 0) { + if (aSig == 0) { + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return packFloat64(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(bSig, &bExp, &bSig); + } + zExp = aExp + bExp - 0x3FF; + aSig = (aSig | BX_CONST64(0x0010000000000000))<<10; + bSig = (bSig | BX_CONST64(0x0010000000000000))<<11; + mul64To128(aSig, bSig, &zSig0, &zSig1); + zSig0 |= (zSig1 != 0); + if (0 <= (Bit64s) (zSig0<<1)) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat64(zSign, zExp, zSig0, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the double-precision floating-point value `a' +| by the corresponding value `b'. The operation is performed according to +| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_div(float64 a, float64 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit16s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig; + Bit64u rem0, rem1; + Bit64u term0, term1; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + bSig = extractFloat64Frac(b); + bExp = extractFloat64Exp(b); + bSign = extractFloat64Sign(b); + zSign = aSign ^ bSign; + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + if (bExp == 0) bSig = 0; + } + + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaN(a, b, status); + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0x7FF, 0); + } + if (bExp == 0x7FF) { + if (bSig) return propagateFloat64NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloat64(zSign, 0, 0); + } + if (bExp == 0) { + if (bSig == 0) { + if ((aExp | aSig) == 0) { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + float_raise(status, float_flag_divbyzero); + return packFloat64(zSign, 0x7FF, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0) { + if (aSig == 0) return packFloat64(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + zExp = aExp - bExp + 0x3FD; + aSig = (aSig | BX_CONST64(0x0010000000000000))<<10; + bSig = (bSig | BX_CONST64(0x0010000000000000))<<11; + if (bSig <= (aSig + aSig)) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv128To64(aSig, 0, bSig); + if ((zSig & 0x1FF) <= 2) { + mul64To128(bSig, zSig, &term0, &term1); + sub128(aSig, 0, term0, term1, &rem0, &rem1); + while ((Bit64s) rem0 < 0) { + --zSig; + add128(rem0, rem1, 0, bSig, &rem0, &rem1); + } + zSig |= (rem1 != 0); + } + return roundAndPackFloat64(zSign, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the double-precision floating-point value `a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_sqrt(float64 a, struct float_status_t *status) +{ + int aSign; + Bit16s aExp, zExp; + Bit64u aSig, zSig, doubleZSig; + Bit64u rem0, rem1, term0, term1; + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + + if (aExp == 0x7FF) { + if (aSig) return propagateFloat64NaNOne(a, status); + if (! aSign) return a; + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + + if (get_denormals_are_zeros(status)) { + if (aExp == 0) aSig = 0; + } + + if (aSign) { + if ((aExp | aSig) == 0) return packFloat64(aSign, 0, 0); + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + if (aExp == 0) { + if (aSig == 0) return 0; + float_raise(status, float_flag_denormal); + normalizeFloat64Subnormal(aSig, &aExp, &aSig); + } + zExp = ((aExp - 0x3FF)>>1) + 0x3FE; + aSig |= BX_CONST64(0x0010000000000000); + zSig = estimateSqrt32(aExp, (Bit32u)(aSig>>21)); + aSig <<= 9 - (aExp & 1); + zSig = estimateDiv128To64(aSig, 0, zSig<<32) + (zSig<<30); + if ((zSig & 0x1FF) <= 5) { + doubleZSig = zSig<<1; + mul64To128(zSig, zSig, &term0, &term1); + sub128(aSig, 0, term0, term1, &rem0, &rem1); + while ((Bit64s) rem0 < 0) { + --zSig; + doubleZSig -= 2; + add128(rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1); + } + zSig |= ((rem0 | rem1) != 0); + } + return roundAndPackFloat64(0, zExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Determine double-precision floating-point number class +*----------------------------------------------------------------------------*/ + +float_class_t float64_class(float64 a) +{ + Bit16s aExp = extractFloat64Exp(a); + Bit64u aSig = extractFloat64Frac(a); + int aSign = extractFloat64Sign(a); + + if(aExp == 0x7FF) { + if (aSig == 0) + return (aSign) ? float_negative_inf : float_positive_inf; + + return (aSig & BX_CONST64(0x0008000000000000)) ? float_QNaN : float_SNaN; + } + + if(aExp == 0) { + if (aSig == 0) + return float_zero; + return float_denormal; + } + + return float_normalized; +} + +/*---------------------------------------------------------------------------- +| Compare between two double precision floating point numbers. Returns +| 'float_relation_equal' if the operands are equal, 'float_relation_less' if +| the value 'a' is less than the corresponding value `b', +| 'float_relation_greater' if the value 'a' is greater than the corresponding +| value `b', or 'float_relation_unordered' otherwise. +*----------------------------------------------------------------------------*/ + +int float64_compare(float64 a, float64 b, int quiet, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float64_denormal_to_zero(a); + b = float64_denormal_to_zero(b); + } + + float_class_t aClass = float64_class(a); + float_class_t bClass = float64_class(b); + + if (aClass == float_SNaN || bClass == float_SNaN) { + float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_QNaN || bClass == float_QNaN) { + if (! quiet) float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_denormal || bClass == float_denormal) { + float_raise(status, float_flag_denormal); + } + + if ((a == b) || ((Bit64u) ((a | b)<<1) == 0)) return float_relation_equal; + + int aSign = extractFloat64Sign(a); + int bSign = extractFloat64Sign(b); + if (aSign != bSign) + return (aSign) ? float_relation_less : float_relation_greater; + + if (aSign ^ (a < b)) return float_relation_less; + return float_relation_greater; +} + +/*---------------------------------------------------------------------------- +| Compare between two double precision floating point numbers and return the +| smaller of them. +*----------------------------------------------------------------------------*/ + +float64 float64_min(float64 a, float64 b, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float64_denormal_to_zero(a); + b = float64_denormal_to_zero(b); + } + + return (float64_compare_two(a, b, status) == float_relation_less) ? a : b; +} + +/*---------------------------------------------------------------------------- +| Compare between two double precision floating point numbers and return the +| larger of them. +*----------------------------------------------------------------------------*/ + +float64 float64_max(float64 a, float64 b, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float64_denormal_to_zero(a); + b = float64_denormal_to_zero(b); + } + + return (float64_compare_two(a, b, status) == float_relation_greater) ? a : b; +} + +/*---------------------------------------------------------------------------- +| Compare between two double precision floating point numbers and return the +| smaller/larger of them. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 float64_minmax(float64 a, float64 b, int is_max, int is_abs, struct float_status_t *status) +{ + if (get_denormals_are_zeros(status)) { + a = float64_denormal_to_zero(a); + b = float64_denormal_to_zero(b); + } + + if (float64_is_nan(a) || float64_is_nan(b)) { + if (float64_is_signaling_nan(a)) { + return propagateFloat64NaNOne(a, status); + } + if (float64_is_signaling_nan(b)) { + return propagateFloat64NaNOne(b, status); + } + if (! float64_is_nan(b)) { + if (float64_is_denormal(b)) + float_raise(status, float_flag_denormal); + return b; + } + if (! float64_is_nan(a)) { + if (float64_is_denormal(a)) + float_raise(status, float_flag_denormal); + return a; + } + return propagateFloat64NaN(a, b, status); + } + + float64 tmp_a = a, tmp_b = b; + if (is_abs) { + tmp_a &= ~BX_CONST64(0x8000000000000000); // clear the sign bit + tmp_b &= ~BX_CONST64(0x8000000000000000); + } + + int aSign = extractFloat64Sign(tmp_a); + int bSign = extractFloat64Sign(tmp_b); + + if (float64_is_denormal(a) || float64_is_denormal(b)) + float_raise(status, float_flag_denormal); + + if (aSign != bSign) { + if (! is_max) { + return aSign ? a : b; + } else { + return aSign ? b : a; + } + } else { + if (! is_max) { + return (aSign ^ (tmp_a < tmp_b)) ? a : b; + } else { + return (aSign ^ (tmp_a < tmp_b)) ? b : a; + } + } +} + +#ifdef FLOATX80 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 32-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 int32_to_floatx80(Bit32s a) +{ + if (a == 0) return packFloatx80(0, 0, 0); + int zSign = (a < 0); + Bit32u absA = zSign ? -a : a; + int shiftCount = countLeadingZeros32(absA) + 32; + Bit64u zSig = absA; + return packFloatx80(zSign, 0x403E - shiftCount, zSig< 0x401E) { + float_raise(status, float_flag_invalid); + return (Bit32s)(int32_indefinite); + } + if (aExp < 0x3FFF) { + if (aExp || aSig) float_raise(status, float_flag_inexact); + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = (Bit32s) aSig; + if (aSign) z = -z; + if ((z < 0) ^ aSign) { + float_raise(status, float_flag_invalid); + return (Bit32s)(int32_indefinite); + } + if ((aSig<>(-shiftCount); + if ((Bit64u) (aSig<<(shiftCount & 63))) { + float_raise(status, float_flag_inexact); + } + if (aSign) z = -z; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the single-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 floatx80_to_float32(floatx80 a, struct float_status_t *status) +{ + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + float_raise(status, float_flag_invalid); + return float32_default_nan; + } + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) + return commonNaNToFloat32(floatx80ToCommonNaN(a, status)); + + return packFloat32(aSign, 0xFF, 0); + } + aSig = shift64RightJamming(aSig, 33); + if (aExp || aSig) aExp -= 0x3F81; + return roundAndPackFloat32(aSign, aExp, (Bit32u) aSig, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the double-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float64 floatx80_to_float64(floatx80 a, struct float_status_t *status) +{ + Bit32s aExp; + Bit64u aSig, zSig; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + float_raise(status, float_flag_invalid); + return float64_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) { + return commonNaNToFloat64(floatx80ToCommonNaN(a, status)); + } + return packFloat64(aSign, 0x7FF, 0); + } + zSig = shift64RightJamming(aSig, 1); + if (aExp || aSig) aExp -= 0x3C01; + return roundAndPackFloat64(aSign, aExp, zSig, status); +} + +/*---------------------------------------------------------------------------- +| Rounds the extended double-precision floating-point value `a' to an integer, +| and returns the result as an extended double-precision floating-point +| value. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_round_to_int(floatx80 a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + int aSign; + Bit64u lastBitMask, roundBitsMask; + int roundingMode = get_float_rounding_mode(status); + floatx80 z; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + Bit32s aExp = extractFloatx80Exp(a); + Bit64u aSig = extractFloatx80Frac(a); + if (0x403E <= aExp) { + if ((aExp == 0x7FFF) && (Bit64u) (aSig<<1)) { + return propagateFloatx80NaNOne(a, status); + } + return a; + } + if (aExp < 0x3FFF) { + if (aExp == 0) { + if ((aSig<<1) == 0) return a; + float_raise(status, float_flag_denormal); + } + float_raise(status, float_flag_inexact); + aSign = extractFloatx80Sign(a); + switch (roundingMode) { + case float_round_nearest_even: + if ((aExp == 0x3FFE) && (Bit64u) (aSig<<1)) { + set_float_rounding_up(status); + return packFloatx80(aSign, 0x3FFF, BX_CONST64(0x8000000000000000)); + } + break; + case float_round_down: + if (aSign) { + set_float_rounding_up(status); + return packFloatx80(1, 0x3FFF, BX_CONST64(0x8000000000000000)); + } + else { + return packFloatx80(0, 0, 0); + } + case float_round_up: + if (aSign) { + return packFloatx80(1, 0, 0); + } + else { + set_float_rounding_up(status); + return packFloatx80(0, 0x3FFF, BX_CONST64(0x8000000000000000)); + } + } + return packFloatx80(aSign, 0, 0); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + if (roundingMode == float_round_nearest_even) { + z.fraction += lastBitMask>>1; + if ((z.fraction & roundBitsMask) == 0) z.fraction &= ~lastBitMask; + } + else if (roundingMode != float_round_to_zero) { + if (extractFloatx80Sign(z) ^ (roundingMode == float_round_up)) + z.fraction += roundBitsMask; + } + z.fraction &= ~roundBitsMask; + if (z.fraction == 0) { + z.exp++; + z.fraction = BX_CONST64(0x8000000000000000); + } + if (z.fraction != a.fraction) { + float_raise(status, float_flag_inexact); + if (z.fraction > a.fraction || z.exp > a.exp) + set_float_rounding_up(status); + } + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the extended double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is +| negated before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, int zSign, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig0, zSig1; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) + return propagateFloatx80NaN(a, b, status); + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if ((bExp == 0) && bSig) { + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + return roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, bExp, bSig, 0, status); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) + return roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, aExp, aSig, 0, status); + + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + Bit32s expDiff = aExp - bExp; + zExp = aExp; + if (0 < expDiff) { + shift64ExtraRightJamming(bSig, 0, expDiff, &bSig, &zSig1); + } + else if (expDiff < 0) { + shift64ExtraRightJamming(aSig, 0, -expDiff, &aSig, &zSig1); + zExp = bExp; + } + else { + zSig0 = aSig + bSig; + zSig1 = 0; + goto shiftRight1; + } + zSig0 = aSig + bSig; + if ((Bit64s) zSig0 < 0) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming(zSig0, zSig1, 1, &zSig0, &zSig1); + zSig0 |= BX_CONST64(0x8000000000000000); + zExp++; + roundAndPack: + return + roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the extended +| double-precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, int zSign, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig0, zSig1; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) return propagateFloatx80NaN(a, b, status); + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign ^ 1, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if (bExp == 0) { + if (bSig) { + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + return roundAndPackFloatx80(get_float_rounding_precision(status), + zSign ^ 1, bExp, bSig, 0, status); + } + return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0); + } + return roundAndPackFloatx80(get_float_rounding_precision(status), + zSign ^ 1, bExp, bSig, 0, status); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) + return roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, aExp, aSig, 0, status); + + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + Bit32s expDiff = aExp - bExp; + if (0 < expDiff) { + shift128RightJamming(bSig, 0, expDiff, &bSig, &zSig1); + goto aBigger; + } + if (expDiff < 0) { + shift128RightJamming(aSig, 0, -expDiff, &aSig, &zSig1); + goto bBigger; + } + zSig1 = 0; + if (bSig < aSig) goto aBigger; + if (aSig < bSig) goto bBigger; + return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0); + bBigger: + sub128(bSig, 0, aSig, zSig1, &zSig0, &zSig1); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aBigger: + sub128(aSig, 0, bSig, zSig1, &zSig0, &zSig1); + zExp = aExp; + normalizeRoundAndPack: + return + normalizeRoundAndPackFloatx80(get_float_rounding_precision(status), + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the extended double-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_add(floatx80 a, floatx80 b, struct float_status_t *status) +{ + int aSign = extractFloatx80Sign(a); + int bSign = extractFloatx80Sign(b); + + if (aSign == bSign) + return addFloatx80Sigs(a, b, aSign, status); + else + return subFloatx80Sigs(a, b, aSign, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_sub(floatx80 a, floatx80 b, struct float_status_t *status) +{ + int aSign = extractFloatx80Sign(a); + int bSign = extractFloatx80Sign(b); + + if (aSign == bSign) + return subFloatx80Sigs(a, b, aSign, status); + else + return addFloatx80Sigs(a, b, aSign, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_mul(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + int aSign, bSign, zSign; + Bit32s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig0, zSig1; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + invalid: + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + zSign = aSign ^ bSign; + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) { + return propagateFloatx80NaN(a, b, status); + } + if (bExp == 0) { + if (bSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if (aExp == 0) { + if (aSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return packFloatx80(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128(aSig, bSig, &zSig0, &zSig1); + if (0 < (Bit64s) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + return + roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the extended double-precision floating-point +| value `a' by the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_div(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + int aSign, bSign, zSign; + Bit32s aExp, bExp, zExp; + Bit64u aSig, bSig, zSig0, zSig1; + Bit64u rem0, rem1, rem2, term0, term1, term2; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + + zSign = aSign ^ bSign; + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) return propagateFloatx80NaN(a, b, status); + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0, 0); + } + if (bExp == 0) { + if (bSig == 0) { + if ((aExp | aSig) == 0) { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + float_raise(status, float_flag_divbyzero); + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0) { + if (aSig == 0) return packFloatx80(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if (bSig <= aSig) { + shift128Right(aSig, 0, 1, &aSig, &rem1); + ++zExp; + } + zSig0 = estimateDiv128To64(aSig, rem1, bSig); + mul64To128(bSig, zSig0, &term0, &term1); + sub128(aSig, rem1, term0, term1, &rem0, &rem1); + while ((Bit64s) rem0 < 0) { + --zSig0; + add128(rem0, rem1, 0, bSig, &rem0, &rem1); + } + zSig1 = estimateDiv128To64(rem1, 0, bSig); + if ((Bit64u) (zSig1<<1) <= 8) { + mul64To128(bSig, zSig1, &term1, &term2); + sub128(rem1, 0, term1, term2, &rem1, &rem2); + while ((Bit64s) rem1 < 0) { + --zSig1; + add128(rem1, rem2, 0, bSig, &rem1, &rem2); + } + zSig1 |= ((rem1 | rem2) != 0); + } + return + roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the square root of the extended double-precision floating-point +| value `a'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_sqrt(floatx80 a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + int aSign; + Bit32s aExp, zExp; + Bit64u aSig0, aSig1, zSig0, zSig1, doubleZSig0; + Bit64u rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig0 = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig0<<1)) return propagateFloatx80NaNOne(a, status); + if (! aSign) return a; + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + if (aSign) { + if ((aExp | aSig0) == 0) return a; + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + if (aExp == 0) { + if (aSig0 == 0) return packFloatx80(0, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + zExp = ((aExp - 0x3FFF)>>1) + 0x3FFF; + zSig0 = estimateSqrt32(aExp, aSig0>>32); + shift128Right(aSig0, 0, 2 + (aExp & 1), &aSig0, &aSig1); + zSig0 = estimateDiv128To64(aSig0, aSig1, zSig0<<32) + (zSig0<<30); + doubleZSig0 = zSig0<<1; + mul64To128(zSig0, zSig0, &term0, &term1); + sub128(aSig0, aSig1, term0, term1, &rem0, &rem1); + while ((Bit64s) rem0 < 0) { + --zSig0; + doubleZSig0 -= 2; + add128(rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1); + } + zSig1 = estimateDiv128To64(rem1, 0, doubleZSig0); + if ((zSig1 & BX_CONST64(0x3FFFFFFFFFFFFFFF)) <= 5) { + if (zSig1 == 0) zSig1 = 1; + mul64To128(doubleZSig0, zSig1, &term1, &term2); + sub128(rem1, 0, term1, term2, &rem1, &rem2); + mul64To128(zSig1, zSig1, &term2, &term3); + sub192(rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3); + while ((Bit64s) rem1 < 0) { + --zSig1; + shortShift128Left(0, zSig1, 1, &term2, &term3); + term3 |= 1; + term2 |= doubleZSig0; + add192(rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3); + } + zSig1 |= ((rem1 | rem2 | rem3) != 0); + } + shortShift128Left(0, zSig1, 1, &zSig0, &zSig1); + zSig0 |= doubleZSig0; + return + roundAndPackFloatx80(get_float_rounding_precision(status), + 0, zExp, zSig0, zSig1, status); +} + +#endif + +#ifdef FLOAT128 + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the quadruple-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 floatx80_to_float128(floatx80 a, struct float_status_t *status) +{ + Bit64u zSig0, zSig1; + + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + + if ((aExp == 0x7FFF) && (Bit64u) (aSig<<1)) + return commonNaNToFloat128(floatx80ToCommonNaN(a, status)); + + shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); + return packFloat128Four(aSign, aExp, zSig0, zSig1); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the quadruple-precision floating-point +| value `a' to the extended double-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 float128_to_floatx80(float128 a, struct float_status_t *status) +{ + Bit32s aExp; + Bit64u aSig0, aSig1; + + aSig1 = extractFloat128Frac1(a); + aSig0 = extractFloat128Frac0(a); + aExp = extractFloat128Exp(a); + int aSign = extractFloat128Sign(a); + + if (aExp == 0x7FFF) { + if (aSig0 | aSig1) + return commonNaNToFloatx80(float128ToCommonNaN(a, status)); + + return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + + if (aExp == 0) { + if ((aSig0 | aSig1) == 0) return packFloatx80(aSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1); + } + else aSig0 |= BX_CONST64(0x0001000000000000); + + shortShift128Left(aSig0, aSig1, 15, &aSig0, &aSig1); + return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the extended double-precision floating- +| point value `a' and quadruple-precision floating point value `b'. The +| operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_128_mul(floatx80 a, float128 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp, zExp; + Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2; + int aSign, bSign, zSign; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a)) + { + invalid: + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig0 = extractFloat128Frac0(b); + bSig1 = extractFloat128Frac1(b); + bExp = extractFloat128Exp(b); + bSign = extractFloat128Sign(b); + + zSign = aSign ^ bSign; + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) + || ((bExp == 0x7FFF) && (bSig0 | bSig1))) + { + floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status)); + return propagateFloatx80NaN(a, r, status); + } + if (bExp == 0) { + if ((bSig0 | bSig1) == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) { + floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status)); + return propagateFloatx80NaN(a, r, status); + } + if (aExp == 0) { + if (aSig == 0) goto invalid; + float_raise(status, float_flag_denormal); + } + return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (aSig == 0) { + if ((bExp == 0) && (bSig0 | bSig1)) float_raise(status, float_flag_denormal); + return packFloatx80(zSign, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1); + } + else bSig0 |= BX_CONST64(0x0001000000000000); + + zExp = aExp + bExp - 0x3FFE; + shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1); + mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2); + if (0 < (Bit64s) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + return + roundAndPackFloatx80(get_float_rounding_precision(status), + zSign, zExp, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the absolute values of the quadruple-precision +| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +| before being returned. `zSign' is ignored if the result is a NaN. +| The addition is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float128 addFloat128Sigs(float128 a, float128 b, int zSign, struct float_status_t *status) +{ + Bit32s aExp, bExp, zExp; + Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + Bit32s expDiff; + + aSig1 = extractFloat128Frac1(a); + aSig0 = extractFloat128Frac0(a); + aExp = extractFloat128Exp(a); + bSig1 = extractFloat128Frac1(b); + bSig0 = extractFloat128Frac0(b); + bExp = extractFloat128Exp(b); + expDiff = aExp - bExp; + + if (0 < expDiff) { + if (aExp == 0x7FFF) { + if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status); + return a; + } + if (bExp == 0) --expDiff; + else bSig0 |= BX_CONST64(0x0001000000000000); + shift128ExtraRightJamming(bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2); + zExp = aExp; + } + else if (expDiff < 0) { + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status); + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + if (aExp == 0) ++expDiff; + else aSig0 |= BX_CONST64(0x0001000000000000); + shift128ExtraRightJamming(aSig0, aSig1, 0, -expDiff, &aSig0, &aSig1, &zSig2); + zExp = bExp; + } + else { + if (aExp == 0x7FFF) { + if (aSig0 | aSig1 | bSig0 | bSig1) + return propagateFloat128NaN(a, b, status); + + return a; + } + add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1); + if (aExp == 0) return packFloat128Four(zSign, 0, zSig0, zSig1); + zSig2 = 0; + zSig0 |= BX_CONST64(0x0002000000000000); + zExp = aExp; + goto shiftRight1; + } + aSig0 |= BX_CONST64(0x0001000000000000); + add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1); + --zExp; + if (zSig0 < BX_CONST64(0x0002000000000000)) goto roundAndPack; + ++zExp; + shiftRight1: + shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2); + roundAndPack: + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the absolute values of the quadruple- +| precision floating-point values `a' and `b'. If `zSign' is 1, the +| difference is negated before being returned. `zSign' is ignored if the +| result is a NaN. The subtraction is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float128 subFloat128Sigs(float128 a, float128 b, int zSign, struct float_status_t *status) +{ + Bit32s aExp, bExp, zExp; + Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + Bit32s expDiff; + + aSig1 = extractFloat128Frac1(a); + aSig0 = extractFloat128Frac0(a); + aExp = extractFloat128Exp(a); + bSig1 = extractFloat128Frac1(b); + bSig0 = extractFloat128Frac0(b); + bExp = extractFloat128Exp(b); + + expDiff = aExp - bExp; + shortShift128Left(aSig0, aSig1, 14, &aSig0, &aSig1); + shortShift128Left(bSig0, bSig1, 14, &bSig0, &bSig1); + if (0 < expDiff) goto aExpBigger; + if (expDiff < 0) goto bExpBigger; + if (aExp == 0x7FFF) { + if (aSig0 | aSig1 | bSig0 | bSig1) + return propagateFloat128NaN(a, b, status); + + float_raise(status, float_flag_invalid); + return float128_default_nan; + } + if (aExp == 0) { + aExp = 1; + bExp = 1; + } + if (bSig0 < aSig0) goto aBigger; + if (aSig0 < bSig0) goto bBigger; + if (bSig1 < aSig1) goto aBigger; + if (aSig1 < bSig1) goto bBigger; + return packFloat128(0, 0); + + bExpBigger: + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status); + return packFloat128Four(zSign ^ 1, 0x7FFF, 0, 0); + } + if (aExp == 0) ++expDiff; + else { + aSig0 |= BX_CONST64(0x4000000000000000); + } + shift128RightJamming(aSig0, aSig1, - expDiff, &aSig0, &aSig1); + bSig0 |= BX_CONST64(0x4000000000000000); + bBigger: + sub128(bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if (aExp == 0x7FFF) { + if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status); + return a; + } + if (bExp == 0) --expDiff; + else { + bSig0 |= BX_CONST64(0x4000000000000000); + } + shift128RightJamming(bSig0, bSig1, expDiff, &bSig0, &bSig1); + aSig0 |= BX_CONST64(0x4000000000000000); + aBigger: + sub128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of adding the quadruple-precision floating-point values +| `a' and `b'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_add(float128 a, float128 b, struct float_status_t *status) +{ + int aSign = extractFloat128Sign(a); + int bSign = extractFloat128Sign(b); + + if (aSign == bSign) { + return addFloat128Sigs(a, b, aSign, status); + } + else { + return subFloat128Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of subtracting the quadruple-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_sub(float128 a, float128 b, struct float_status_t *status) +{ + int aSign = extractFloat128Sign(a); + int bSign = extractFloat128Sign(b); + + if (aSign == bSign) { + return subFloat128Sigs(a, b, aSign, status); + } + else { + return addFloat128Sigs(a, b, aSign, status); + } +} + +/*---------------------------------------------------------------------------- +| Returns the result of multiplying the quadruple-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_mul(float128 a, float128 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit32s aExp, bExp, zExp; + Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + + aSig1 = extractFloat128Frac1(a); + aSig0 = extractFloat128Frac0(a); + aExp = extractFloat128Exp(a); + aSign = extractFloat128Sign(a); + bSig1 = extractFloat128Frac1(b); + bSig0 = extractFloat128Frac0(b); + bExp = extractFloat128Exp(b); + bSign = extractFloat128Sign(b); + + zSign = aSign ^ bSign; + if (aExp == 0x7FFF) { + if ((aSig0 | aSig1) || ((bExp == 0x7FFF) && (bSig0 | bSig1))) { + return propagateFloat128NaN(a, b, status); + } + if ((bExp | bSig0 | bSig1) == 0) { + float_raise(status, float_flag_invalid); + return float128_default_nan; + } + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status); + if ((aExp | aSig0 | aSig1) == 0) { + float_raise(status, float_flag_invalid); + return float128_default_nan; + } + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + if (aExp == 0) { + if ((aSig0 | aSig1) == 0) return packFloat128Four(zSign, 0, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1); + } + if (bExp == 0) { + if ((bSig0 | bSig1) == 0) return packFloat128Four(zSign, 0, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1); + } + zExp = aExp + bExp - 0x4000; + aSig0 |= BX_CONST64(0x0001000000000000); + shortShift128Left(bSig0, bSig1, 16, &bSig0, &bSig1); + mul128To256(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3); + add128(zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1); + zSig2 |= (zSig3 != 0); + if (BX_CONST64(0x0002000000000000) <= zSig0) { + shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2); + ++zExp; + } + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of dividing the quadruple-precision floating-point value +| `a' by the corresponding value `b'. The operation is performed according to +| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 float128_div(float128 a, float128 b, struct float_status_t *status) +{ + int aSign, bSign, zSign; + Bit32s aExp, bExp, zExp; + Bit64u aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + Bit64u rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat128Frac1(a); + aSig0 = extractFloat128Frac0(a); + aExp = extractFloat128Exp(a); + aSign = extractFloat128Sign(a); + bSig1 = extractFloat128Frac1(b); + bSig0 = extractFloat128Frac0(b); + bExp = extractFloat128Exp(b); + bSign = extractFloat128Sign(b); + + zSign = aSign ^ bSign; + if (aExp == 0x7FFF) { + if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status); + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status); + float_raise(status, float_flag_invalid); + return float128_default_nan; + } + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + if (bExp == 0x7FFF) { + if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status); + return packFloat128Four(zSign, 0, 0, 0); + } + if (bExp == 0) { + if ((bSig0 | bSig1) == 0) { + if ((aExp | aSig0 | aSig1) == 0) { + float_raise(status, float_flag_invalid); + return float128_default_nan; + } + float_raise(status, float_flag_divbyzero); + return packFloat128Four(zSign, 0x7FFF, 0, 0); + } + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1); + } + if (aExp == 0) { + if ((aSig0 | aSig1) == 0) return packFloat128Four(zSign, 0, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1); + } + zExp = aExp - bExp + 0x3FFD; + shortShift128Left( + aSig0 | BX_CONST64(0x0001000000000000), aSig1, 15, &aSig0, &aSig1); + shortShift128Left( + bSig0 | BX_CONST64(0x0001000000000000), bSig1, 15, &bSig0, &bSig1); + if (le128(bSig0, bSig1, aSig0, aSig1)) { + shift128Right(aSig0, aSig1, 1, &aSig0, &aSig1); + ++zExp; + } + zSig0 = estimateDiv128To64(aSig0, aSig1, bSig0); + mul128By64To192(bSig0, bSig1, zSig0, &term0, &term1, &term2); + sub192(aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2); + while ((Bit64s) rem0 < 0) { + --zSig0; + add192(rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2); + } + zSig1 = estimateDiv128To64(rem1, rem2, bSig0); + if ((zSig1 & 0x3FFF) <= 4) { + mul128By64To192(bSig0, bSig1, zSig1, &term1, &term2, &term3); + sub192(rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3); + while ((Bit64s) rem1 < 0) { + --zSig1; + add192(rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3); + } + zSig1 |= ((rem1 | rem2 | rem3) != 0); + } + shift128ExtraRightJamming(zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2); + return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the 64-bit two's complement integer `a' to +| the quadruple-precision floating-point format. The conversion is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +float128 int64_to_float128(Bit64s a) +{ + Bit64u zSig0, zSig1; + + if (a == 0) return packFloat128Four(0, 0, 0, 0); + int zSign = (a < 0); + Bit64u absA = zSign ? - a : a; + Bit8u shiftCount = countLeadingZeros64(absA) + 49; + Bit32s zExp = 0x406E - shiftCount; + if (64 <= shiftCount) { + zSig1 = 0; + zSig0 = absA; + shiftCount -= 64; + } + else { + zSig1 = absA; + zSig0 = 0; + } + shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1); + return packFloat128Four(zSign, zExp, zSig0, zSig1); +} + +#endif diff --git a/src/cpu/softfloat/softfloat.h b/src/cpu/softfloat/softfloat.h new file mode 100644 index 000000000..1d1b0f08f --- /dev/null +++ b/src/cpu/softfloat/softfloat.h @@ -0,0 +1,488 @@ +/*============================================================================ +This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "config.h" /* generated by configure script from config.h.in */ + +#ifndef _SOFTFLOAT_H_ +#define _SOFTFLOAT_H_ + +#define FLOAT16 +#define FLOATX80 + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point types. +*----------------------------------------------------------------------------*/ +#ifdef FLOAT16 +typedef Bit16u float16; +#endif +typedef Bit32u float32; +typedef Bit64u float64; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point class. +*----------------------------------------------------------------------------*/ +typedef enum { + float_zero, + float_SNaN, + float_QNaN, + float_negative_inf, + float_positive_inf, + float_denormal, + float_normalized +} float_class_t; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point NaN operands handling mode. +*----------------------------------------------------------------------------*/ +enum float_nan_handling_mode_t { + float_larger_significand_nan = 0, // this mode used by x87 FPU + float_first_operand_nan = 1 // this mode used by SSE +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point rounding mode. +*----------------------------------------------------------------------------*/ +enum float_round_t { + float_round_nearest_even = 0, + float_round_down = 1, + float_round_up = 2, + float_round_to_zero = 3 +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point exception flags. +*----------------------------------------------------------------------------*/ +enum float_exception_flag_t { + float_flag_invalid = 0x01, + float_flag_denormal = 0x02, + float_flag_divbyzero = 0x04, + float_flag_overflow = 0x08, + float_flag_underflow = 0x10, + float_flag_inexact = 0x20 +}; + +extern const unsigned float_all_exceptions_mask; + +#ifdef FLOATX80 +#define RAISE_SW_C1 0x0200 +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point ordering relations +*----------------------------------------------------------------------------*/ +enum { + float_relation_less = -1, + float_relation_equal = 0, + float_relation_greater = 1, + float_relation_unordered = 2 +}; + +/*---------------------------------------------------------------------------- +| Options to indicate which negations to perform in float*_muladd() +| Using these differs from negating an input or output before calling +| the muladd function in that this means that a NaN doesn't have its +| sign bit inverted before it is propagated. +*----------------------------------------------------------------------------*/ +enum { + float_muladd_negate_c = 1, + float_muladd_negate_product = 2, + float_muladd_negate_result = float_muladd_negate_c | float_muladd_negate_product +}; + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point status structure. +*----------------------------------------------------------------------------*/ +struct float_status_t +{ +#ifdef FLOATX80 + int float_rounding_precision; /* floatx80 only */ +#endif + int float_rounding_mode; + int float_exception_flags; + int float_exception_masks; + int float_suppress_exception; + int float_nan_handling_mode; /* flag register */ + int flush_underflow_to_zero; /* flag register */ + int denormals_are_zeros; /* flag register */ +}; + +/*---------------------------------------------------------------------------- +| Routine to raise any or all of the software IEC/IEEE floating-point +| exception flags. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE void float_raise(struct float_status_t *status, int flags) +{ + status->float_exception_flags |= flags; +} + +/*---------------------------------------------------------------------------- +| Returns raised IEC/IEEE floating-point exception flags. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int get_exception_flags(const struct float_status_t *status) +{ + return status->float_exception_flags & ~status->float_suppress_exception; +} + +/*---------------------------------------------------------------------------- +| Routine to check if any or all of the software IEC/IEEE floating-point +| exceptions are masked. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int float_exception_masked(const struct float_status_t *status, int flag) +{ + return status->float_exception_masks & flag; +} + +/*---------------------------------------------------------------------------- +| Returns current floating point rounding mode specified by status word. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int get_float_rounding_mode(const struct float_status_t *status) +{ + return status->float_rounding_mode; +} + +/*---------------------------------------------------------------------------- +| Returns current floating point precision (floatx80 only). +*----------------------------------------------------------------------------*/ + +#ifdef FLOATX80 +BX_CPP_INLINE int get_float_rounding_precision(const struct float_status_t *status) +{ + return status->float_rounding_precision; +} +#endif + +/*---------------------------------------------------------------------------- +| Returns current floating point NaN operands handling mode specified +| by status word. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int get_float_nan_handling_mode(const struct float_status_t *status) +{ + return status->float_nan_handling_mode; +} + +/*---------------------------------------------------------------------------- +| Raise floating point precision lost up flag (floatx80 only). +*----------------------------------------------------------------------------*/ + +#ifdef FLOATX80 +BX_CPP_INLINE void set_float_rounding_up(struct float_status_t *status) +{ + status->float_exception_flags |= RAISE_SW_C1; +} +#endif + +/*---------------------------------------------------------------------------- +| Returns 1 if the feature is supported; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int get_denormals_are_zeros(const struct float_status_t *status) +{ + return status->denormals_are_zeros; +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the feature is supported; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE int get_flush_underflow_to_zero(const struct float_status_t *status) +{ + return status->flush_underflow_to_zero; +} + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 int32_to_float32(Bit32s, struct float_status_t *status); +float64 int32_to_float64(Bit32s); +float32 int64_to_float32(Bit64s, struct float_status_t *status); +float64 int64_to_float64(Bit64s, struct float_status_t *status); + +float32 uint32_to_float32(Bit32u, struct float_status_t *status); +float64 uint32_to_float64(Bit32u); +float32 uint64_to_float32(Bit64u, struct float_status_t *status); +float64 uint64_to_float64(Bit64u, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +Bit32s float32_to_int32(float32, struct float_status_t *status); +Bit32s float32_to_int32_round_to_zero(float32, struct float_status_t *status); +Bit64s float32_to_int64(float32, struct float_status_t *status); +Bit64s float32_to_int64_round_to_zero(float32, struct float_status_t *status); +Bit32u float32_to_uint32(float32, struct float_status_t *status); +Bit32u float32_to_uint32_round_to_zero(float32, struct float_status_t *status); +Bit64u float32_to_uint64(float32, struct float_status_t *status); +Bit64u float32_to_uint64_round_to_zero(float32, struct float_status_t *status); +float64 float32_to_float64(float32, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 float32_round_to_int(float32, Bit8u scale, struct float_status_t *status); +float32 float32_add(float32, float32, struct float_status_t *status); +float32 float32_sub(float32, float32, struct float_status_t *status); +float32 float32_mul(float32, float32, struct float_status_t *status); +float32 float32_div(float32, float32, struct float_status_t *status); +float32 float32_sqrt(float32, struct float_status_t *status); +float32 float32_frc(float32, struct float_status_t *status); +float32 float32_muladd(float32, float32, float32, int flags, struct float_status_t *status); +float32 float32_scalef(float32, float32, struct float_status_t *status); +int float32_compare(float32, float32, int quiet, struct float_status_t *status); + +BX_CPP_INLINE float32 float32_round_to_int_one(float32 a, struct float_status_t *status) +{ + return float32_round_to_int(a, 0, status); +} + +BX_CPP_INLINE float32 float32_fmadd(float32 a, float32 b, float32 c, struct float_status_t *status) +{ + return float32_muladd(a, b, c, 0, status); +} + +BX_CPP_INLINE float32 float32_fmsub(float32 a, float32 b, float32 c, struct float_status_t *status) +{ + return float32_muladd(a, b, c, float_muladd_negate_c, status); +} + +BX_CPP_INLINE float32 float32_fnmadd(float32 a, float32 b, float32 c, struct float_status_t *status) +{ + return float32_muladd(a, b, c, float_muladd_negate_product, status); +} + +BX_CPP_INLINE float32 float32_fnmsub(float32 a, float32 b, float32 c, struct float_status_t *status) +{ + return float32_muladd(a, b, c, float_muladd_negate_result, status); +} + +BX_CPP_INLINE int float32_compare_two(float32 a, float32 b, struct float_status_t *status) +{ + return float32_compare(a, b, 0, status); +} + +BX_CPP_INLINE int float32_compare_quiet(float32 a, float32 b, struct float_status_t *status) +{ + return float32_compare(a, b, 1, status); +} + +float_class_t float32_class(float32); + +float32 float32_min(float32 a, float32 b, struct float_status_t *status); +float32 float32_max(float32 a, float32 b, struct float_status_t *status); + +float32 float32_minmax(float32 a, float32 b, int is_max, int is_abs, struct float_status_t *status); +float32 float32_getexp(float32 a, struct float_status_t *status); +float32 float32_getmant(float32 a, struct float_status_t *status, int sign_ctrl, int interv); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +Bit32s float64_to_int32(float64, struct float_status_t *status); +Bit32s float64_to_int32_round_to_zero(float64, struct float_status_t *status); +Bit64s float64_to_int64(float64, struct float_status_t *status); +Bit64s float64_to_int64_round_to_zero(float64, struct float_status_t *status); +Bit32u float64_to_uint32(float64, struct float_status_t *status); +Bit32u float64_to_uint32_round_to_zero(float64, struct float_status_t *status); +Bit64u float64_to_uint64(float64, struct float_status_t *status); +Bit64u float64_to_uint64_round_to_zero(float64, struct float_status_t *status); +float32 float64_to_float32(float64, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 float64_round_to_int(float64, Bit8u scale, struct float_status_t *status); +float64 float64_add(float64, float64, struct float_status_t *status); +float64 float64_sub(float64, float64, struct float_status_t *status); +float64 float64_mul(float64, float64, struct float_status_t *status); +float64 float64_div(float64, float64, struct float_status_t *status); +float64 float64_sqrt(float64, struct float_status_t *status); +float64 float64_frc(float64, struct float_status_t *status); +float64 float64_muladd(float64, float64, float64, int flags, struct float_status_t *status); +float64 float64_scalef(float64, float64, struct float_status_t *status); +int float64_compare(float64, float64, int quiet, struct float_status_t *status); + +BX_CPP_INLINE float64 float64_round_to_int_one(float64 a, struct float_status_t *status) +{ + return float64_round_to_int(a, 0, status); +} + +BX_CPP_INLINE float64 float64_fmadd(float64 a, float64 b, float64 c, struct float_status_t *status) +{ + return float64_muladd(a, b, c, 0, status); +} + +BX_CPP_INLINE float64 float64_fmsub(float64 a, float64 b, float64 c, struct float_status_t *status) +{ + return float64_muladd(a, b, c, float_muladd_negate_c, status); +} + +BX_CPP_INLINE float64 float64_fnmadd(float64 a, float64 b, float64 c, struct float_status_t *status) +{ + return float64_muladd(a, b, c, float_muladd_negate_product, status); +} + +BX_CPP_INLINE float64 float64_fnmsub(float64 a, float64 b, float64 c, struct float_status_t *status) +{ + return float64_muladd(a, b, c, float_muladd_negate_result, status); +} + +BX_CPP_INLINE int float64_compare_two(float64 a, float64 b, struct float_status_t *status) +{ + return float64_compare(a, b, 0, status); +} + +BX_CPP_INLINE int float64_compare_quiet(float64 a, float64 b, struct float_status_t *status) +{ + return float64_compare(a, b, 1, status); +} + +float_class_t float64_class(float64); + +float64 float64_min(float64 a, float64 b, struct float_status_t *status); +float64 float64_max(float64 a, float64 b, struct float_status_t *status); + +float64 float64_minmax(float64 a, float64 b, int is_max, int is_abs, struct float_status_t *status); +float64 float64_getexp(float64 a, struct float_status_t *status); +float64 float64_getmant(float64 a, struct float_status_t *status, int sign_ctrl, int interv); + +#ifdef FLOAT16 +float32 float16_to_float32(float16, struct float_status_t *status); +float16 float32_to_float16(float32, struct float_status_t *status); + +float_class_t float16_class(float16); +#endif + +#ifdef FLOATX80 +/*---------------------------------------------------------------------------- +| Software IEC/IEEE floating-point types. +*----------------------------------------------------------------------------*/ + +#ifdef BX_BIG_ENDIAN +typedef struct floatx80 { // leave alignment to compiler + Bit16u exp; + Bit64u fraction; +}; floatx80 +#else +typedef struct floatx80 { + Bit64u fraction; + Bit16u exp; +} floatx80; +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +floatx80 int32_to_floatx80(Bit32s); +floatx80 int64_to_floatx80(Bit64s); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision conversion routines. +*----------------------------------------------------------------------------*/ +floatx80 float32_to_floatx80(float32, struct float_status_t *status); +floatx80 float64_to_floatx80(float64, struct float_status_t *status); + +Bit32s floatx80_to_int32(floatx80, struct float_status_t *status); +Bit32s floatx80_to_int32_round_to_zero(floatx80, struct float_status_t *status); +Bit64s floatx80_to_int64(floatx80, struct float_status_t *status); +Bit64s floatx80_to_int64_round_to_zero(floatx80, struct float_status_t *status); + +float32 floatx80_to_float32(floatx80, struct float_status_t *status); +float64 floatx80_to_float64(floatx80, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_round_to_int(floatx80, struct float_status_t *status); +floatx80 floatx80_add(floatx80, floatx80, struct float_status_t *status); +floatx80 floatx80_sub(floatx80, floatx80, struct float_status_t *status); +floatx80 floatx80_mul(floatx80, floatx80, struct float_status_t *status); +floatx80 floatx80_div(floatx80, floatx80, struct float_status_t *status); +floatx80 floatx80_sqrt(floatx80, struct float_status_t *status); + +float_class_t floatx80_class(floatx80); +#ifdef __cplusplus +} +#endif +#endif /* FLOATX80 */ + +#ifdef FLOAT128 + +#ifdef BX_BIG_ENDIAN +typedef struct float128 { + Bit64u hi, lo; +} float128; +#else +typedef struct float128 { + Bit64u lo, hi; +} float128; +#endif + +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision conversion routines. +*----------------------------------------------------------------------------*/ +float128 floatx80_to_float128(floatx80 a, struct float_status_t *status); +floatx80 float128_to_floatx80(float128 a, struct float_status_t *status); + +float128 int64_to_float128(Bit64s a); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ +floatx80 floatx80_128_mul(floatx80 a, float128 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE quadruple-precision operations. +*----------------------------------------------------------------------------*/ +float128 float128_add(float128 a, float128 b, struct float_status_t *status); +float128 float128_sub(float128 a, float128 b, struct float_status_t *status); +float128 float128_mul(float128 a, float128 b, struct float_status_t *status); +float128 float128_div(float128 a, float128 b, struct float_status_t *status); +#ifdef __cplusplus +} +#endif +#endif /* FLOAT128 */ + +#endif diff --git a/src/cpu/softfloat/softfloat16.cc b/src/cpu/softfloat/softfloat16.cc new file mode 100644 index 000000000..8c17d3a86 --- /dev/null +++ b/src/cpu/softfloat/softfloat16.cc @@ -0,0 +1,129 @@ +/*============================================================================ +This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Adapted for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloat.h" + +#ifdef FLOAT16 + +#include "softfloat-round-pack.h" +#include "softfloat-specialize.h" +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- +| Determine half-precision floating-point number class +*----------------------------------------------------------------------------*/ + +float_class_t float16_class(float16 a) +{ + Bit16s aExp = extractFloat16Exp(a); + Bit16u aSig = extractFloat16Frac(a); + int aSign = extractFloat16Sign(a); + + if(aExp == 0x1F) { + if (aSig == 0) + return (aSign) ? float_negative_inf : float_positive_inf; + + return (aSig & 0x200) ? float_QNaN : float_SNaN; + } + + if(aExp == 0) { + if (aSig == 0) return float_zero; + return float_denormal; + } + + return float_normalized; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the half-precision floating-point value +| `a' to the single-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float32 float16_to_float32(float16 a, struct float_status_t *status) +{ + Bit16u aSig = extractFloat16Frac(a); + Bit16s aExp = extractFloat16Exp(a); + int aSign = extractFloat16Sign(a); + + if (aExp == 0x1F) { + if (aSig) return commonNaNToFloat32(float16ToCommonNaN(a, status)); + return packFloat32(aSign, 0xFF, 0); + } + if (aExp == 0) { + // ignore denormals_are_zeros flag + if (aSig == 0) return packFloat32(aSign, 0, 0); + float_raise(status, float_flag_denormal); + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + --aExp; + } + + return packFloat32(aSign, aExp + 0x70, ((Bit32u) aSig)<<13); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the half-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------*/ + +float16 float32_to_float16(float32 a, struct float_status_t *status) +{ + Bit32u aSig = extractFloat32Frac(a); + Bit16s aExp = extractFloat32Exp(a); + int aSign = extractFloat32Sign(a); + + if (aExp == 0xFF) { + if (aSig) return commonNaNToFloat16(float32ToCommonNaN(a, status)); + return packFloat16(aSign, 0x1F, 0); + } + if (aExp == 0) { + if (get_denormals_are_zeros(status)) aSig = 0; + if (aSig == 0) return packFloat16(aSign, 0, 0); + float_raise(status, float_flag_denormal); + } + + aSig = shift32RightJamming(aSig, 9); + Bit16u zSig = (Bit16u) aSig; + if (aExp || zSig) { + zSig |= 0x4000; + aExp -= 0x71; + } + + return roundAndPackFloat16(aSign, aExp, zSig, status); +} + +#endif diff --git a/src/cpu/softfloat/softfloatx80.cc b/src/cpu/softfloat/softfloatx80.cc new file mode 100644 index 000000000..3ac3e61b3 --- /dev/null +++ b/src/cpu/softfloat/softfloatx80.cc @@ -0,0 +1,367 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#include "softfloatx80.h" +#include "softfloat-round-pack.h" +#include "softfloat-macros.h" + +const floatx80 Const_QNaN = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); +const floatx80 Const_Z = packFloatx80(0, 0x0000, 0); +const floatx80 Const_1 = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000)); +const floatx80 Const_L2T = packFloatx80(0, 0x4000, BX_CONST64(0xd49a784bcd1b8afe)); +const floatx80 Const_L2E = packFloatx80(0, 0x3fff, BX_CONST64(0xb8aa3b295c17f0bc)); +const floatx80 Const_PI = packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235)); +const floatx80 Const_LG2 = packFloatx80(0, 0x3ffd, BX_CONST64(0x9a209a84fbcff799)); +const floatx80 Const_LN2 = packFloatx80(0, 0x3ffe, BX_CONST64(0xb17217f7d1cf79ac)); +const floatx80 Const_INF = packFloatx80(0, 0x7fff, BX_CONST64(0x8000000000000000)); + +/*---------------------------------------------------------------------------- +| Commonly used single-precision floating point constants +*----------------------------------------------------------------------------*/ +const float32 float32_negative_inf = 0xff800000; +const float32 float32_positive_inf = 0x7f800000; +const float32 float32_negative_zero = 0x80000000; +const float32 float32_positive_zero = 0x00000000; +const float32 float32_negative_one = 0xbf800000; +const float32 float32_positive_one = 0x3f800000; +const float32 float32_max_float = 0x7f7fffff; +const float32 float32_min_float = 0xff7fffff; + +/*---------------------------------------------------------------------------- +| The pattern for a default generated single-precision NaN. +*----------------------------------------------------------------------------*/ +const float32 float32_default_nan = 0xffc00000; + +/*---------------------------------------------------------------------------- +| Commonly used single-precision floating point constants +*----------------------------------------------------------------------------*/ +const float64 float64_negative_inf = BX_CONST64(0xfff0000000000000); +const float64 float64_positive_inf = BX_CONST64(0x7ff0000000000000); +const float64 float64_negative_zero = BX_CONST64(0x8000000000000000); +const float64 float64_positive_zero = BX_CONST64(0x0000000000000000); +const float64 float64_negative_one = BX_CONST64(0xbff0000000000000); +const float64 float64_positive_one = BX_CONST64(0x3ff0000000000000); +const float64 float64_max_float = BX_CONST64(0x7fefffffffffffff); +const float64 float64_min_float = BX_CONST64(0xffefffffffffffff); + +/*---------------------------------------------------------------------------- +| The pattern for a default generated double-precision NaN. +*----------------------------------------------------------------------------*/ +const float64 float64_default_nan = BX_CONST64(0xFFF8000000000000); + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 16-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic - which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN or the +| conversion overflows, the integer indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit16s floatx80_to_int16(floatx80 a, struct float_status_t *status) +{ + if (floatx80_is_unsupported(a)) { + float_raise(status, float_flag_invalid); + return int16_indefinite; + } + + Bit32s v32 = floatx80_to_int32(a, status); + + if ((v32 > 32767) || (v32 < -32768)) { + status->float_exception_flags = float_flag_invalid; // throw away other flags + return int16_indefinite; + } + + return (Bit16s) v32; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 16-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN or the conversion overflows, the integer +| indefinite value is returned. +*----------------------------------------------------------------------------*/ + +Bit16s floatx80_to_int16_round_to_zero(floatx80 a, struct float_status_t *status) +{ + if (floatx80_is_unsupported(a)) { + float_raise(status, float_flag_invalid); + return int16_indefinite; + } + + Bit32s v32 = floatx80_to_int32_round_to_zero(a, status); + + if ((v32 > 32767) || (v32 < -32768)) { + status->float_exception_flags = float_flag_invalid; // throw away other flags + return int16_indefinite; + } + + return (Bit16s) v32; +} + +/*---------------------------------------------------------------------------- +| Separate the source extended double-precision floating point value `a' +| into its exponent and significand, store the significant back to the +| 'a' and return the exponent. The operation performed is a superset of +| the IEC/IEEE recommended logb(x) function. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit64u aSig = extractFloatx80Frac(*a); + Bit32s aExp = extractFloatx80Exp(*a); + int aSign = extractFloatx80Sign(*a); + + if (floatx80_is_unsupported(*a)) + { + float_raise(status, float_flag_invalid); + *a = floatx80_default_nan; + return *a; + } + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1)) + { + *a = propagateFloatx80NaNOne(*a, status); + return *a; + } + return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) + { + if (aSig == 0) { + float_raise(status, float_flag_divbyzero); + *a = packFloatx80(aSign, 0, 0); + return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + a->exp = (aSign << 15) + 0x3FFF; + a->fraction = aSig; + return int32_to_floatx80(aExp - 0x3FFF); +} + +/*---------------------------------------------------------------------------- +| Scales extended double-precision floating-point value in operand `a' by +| value `b'. The function truncates the value in the second operand 'b' to +| an integral value and adds that value to the exponent of the operand 'a'. +| The operation performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status) +{ +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + Bit32s aExp, bExp; + Bit64u aSig, bSig; + + // handle unsupported extended double-precision floating encodings + if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + int bSign = extractFloatx80Sign(b); + + if (aExp == 0x7FFF) { + if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) + { + return propagateFloatx80NaN(a, b, status); + } + if ((bExp == 0x7FFF) && bSign) { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + return a; + } + if (bExp == 0x7FFF) { + if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); + if ((aExp | aSig) == 0) { + if (! bSign) { + float_raise(status, float_flag_invalid); + return floatx80_default_nan; + } + return a; + } + if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal); + if (bSign) return packFloatx80(aSign, 0, 0); + return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000)); + } + if (aExp == 0) { + if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal); + if (aSig == 0) return a; + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + if (bExp < 0x3FFF) + return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status); + } + if (bExp == 0) { + if (bSig == 0) return a; + float_raise(status, float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + + if (bExp > 0x400E) { + /* generate appropriate overflow/underflow */ + return roundAndPackFloatx80(80, aSign, + bSign ? -0x3FFF : 0x7FFF, aSig, 0, status); + } + + if (bExp < 0x3FFF) return a; + + int shiftCount = 0x403E - bExp; + bSig >>= shiftCount; + Bit32s scale = (Bit32s) bSig; + if (bSign) scale = -scale; /* -32768..32767 */ + return + roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status); +} + +/*---------------------------------------------------------------------------- +| Determine extended-precision floating-point number class. +*----------------------------------------------------------------------------*/ + +float_class_t floatx80_class(floatx80 a) +{ + Bit32s aExp = extractFloatx80Exp(a); + Bit64u aSig = extractFloatx80Frac(a); + + if(aExp == 0) { + if (aSig == 0) + return float_zero; + + /* denormal or pseudo-denormal */ + return float_denormal; + } + + /* valid numbers have the MS bit set */ + if (!(aSig & BX_CONST64(0x8000000000000000))) + return float_SNaN; /* report unsupported as SNaNs */ + + if(aExp == 0x7fff) { + int aSign = extractFloatx80Sign(a); + + if (((Bit64u) (aSig<< 1)) == 0) + return (aSign) ? float_negative_inf : float_positive_inf; + + return (aSig & BX_CONST64(0x4000000000000000)) ? float_QNaN : float_SNaN; + } + + return float_normalized; +} + +/*---------------------------------------------------------------------------- +| Compare between two extended precision floating point numbers. Returns +| 'float_relation_equal' if the operands are equal, 'float_relation_less' if +| the value 'a' is less than the corresponding value `b', +| 'float_relation_greater' if the value 'a' is greater than the corresponding +| value `b', or 'float_relation_unordered' otherwise. +*----------------------------------------------------------------------------*/ + +int floatx80_compare(floatx80 a, floatx80 b, int quiet, struct float_status_t *status) +{ + float_class_t aClass = floatx80_class(a); + float_class_t bClass = floatx80_class(b); + + if (aClass == float_SNaN || bClass == float_SNaN) + { + /* unsupported reported as SNaN */ + float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_QNaN || bClass == float_QNaN) { + if (! quiet) float_raise(status, float_flag_invalid); + return float_relation_unordered; + } + + if (aClass == float_denormal || bClass == float_denormal) { + float_raise(status, float_flag_denormal); + } + + int aSign = extractFloatx80Sign(a); + int bSign = extractFloatx80Sign(b); + + if (aClass == float_zero) { + if (bClass == float_zero) return float_relation_equal; + return bSign ? float_relation_greater : float_relation_less; + } + + if (bClass == float_zero || aSign != bSign) { + return aSign ? float_relation_less : float_relation_greater; + } + + Bit64u aSig = extractFloatx80Frac(a); + Bit32s aExp = extractFloatx80Exp(a); + Bit64u bSig = extractFloatx80Frac(b); + Bit32s bExp = extractFloatx80Exp(b); + + if (aClass == float_denormal) + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + + if (bClass == float_denormal) + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + + if (aExp == bExp && aSig == bSig) + return float_relation_equal; + + int less_than = + aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig))) + : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig))); + + if (less_than) return float_relation_less; + return float_relation_greater; +} + + +int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status) +{ + return floatx80_compare(a, b, 0, status); +} + +int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status) +{ + return floatx80_compare(a, b, 1, status); +} diff --git a/src/cpu/softfloat/softfloatx80.h b/src/cpu/softfloat/softfloatx80.h new file mode 100644 index 000000000..8378169e2 --- /dev/null +++ b/src/cpu/softfloat/softfloatx80.h @@ -0,0 +1,113 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#ifndef _SOFTFLOATX80_EXTENSIONS_H_ +#define _SOFTFLOATX80_EXTENSIONS_H_ + +#include "softfloat.h" +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ + +#ifdef __cplusplus +extern "C" { +#endif + +Bit16s floatx80_to_int16(floatx80, struct float_status_t *status); +Bit16s floatx80_to_int16_round_to_zero(floatx80, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision operations. +*----------------------------------------------------------------------------*/ + +floatx80 floatx80_extract(floatx80 *a, struct float_status_t *status); +floatx80 floatx80_scale(floatx80 a, floatx80 b, struct float_status_t *status); +int floatx80_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status); +int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 *r, Bit64u *q, struct float_status_t *status); +floatx80 f2xm1(floatx80 a, struct float_status_t *status); +floatx80 fyl2x(floatx80 a, floatx80 b, struct float_status_t *status); +floatx80 fyl2xp1(floatx80 a, floatx80 b, struct float_status_t *status); +floatx80 fpatan(floatx80 a, floatx80 b, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision trigonometric functions. +*----------------------------------------------------------------------------*/ + +int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, struct float_status_t *status); +int fsin(floatx80 *a, struct float_status_t *status); +int fcos(floatx80 *a, struct float_status_t *status); +int ftan(floatx80 *a, struct float_status_t *status); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE extended double-precision compare. +*----------------------------------------------------------------------------*/ + +int floatx80_compare(floatx80, floatx80, int quiet, struct float_status_t *status); +int floatx80_compare_two(floatx80 a, floatx80 b, struct float_status_t *status); +int floatx80_compare_quiet(floatx80 a, floatx80 b, struct float_status_t *status); + +#ifdef __cplusplus +} +#endif + +/*----------------------------------------------------------------------------- +| Calculates the absolute value of the extended double-precision floating-point +| value `a'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE floatx80 floatx80_abs(floatx80 reg) +{ + reg.exp &= 0x7FFF; + return reg; +} + +/*----------------------------------------------------------------------------- +| Changes the sign of the extended double-precision floating-point value 'a'. +| The operation is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +BX_CPP_INLINE floatx80 floatx80_chs(floatx80 reg) +{ + reg.exp ^= 0x8000; + return reg; +} + +/*----------------------------------------------------------------------------- +| Commonly used extended double-precision floating-point constants. +*----------------------------------------------------------------------------*/ + +extern const floatx80 Const_Z; +extern const floatx80 Const_1; +extern const floatx80 Const_L2T; +extern const floatx80 Const_L2E; +extern const floatx80 Const_PI; +extern const floatx80 Const_LG2; +extern const floatx80 Const_LN2; +extern const floatx80 Const_INF; +#endif diff --git a/src/cpu/softfloat/x87_ops_arith.h b/src/cpu/softfloat/x87_ops_arith.h new file mode 100644 index 000000000..10b100b2a --- /dev/null +++ b/src/cpu/softfloat/x87_ops_arith.h @@ -0,0 +1,750 @@ +#define sf_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ + static int sf_FADD##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_add(a, use_var, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FDIV##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_div(a, use_var, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FDIVR##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_div(use_var, a, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv##cycle_postfix) : ((x87_timings.fdiv##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv##cycle_postfix) : ((x87_concurrency.fdiv##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FMUL##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) { \ + result = floatx80_mul(a, use_var, &status); \ + } \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul##cycle_postfix) : ((x87_timings.fmul##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul##cycle_postfix) : ((x87_concurrency.fmul##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FSUB##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_sub(a, use_var, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FSUBR##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a, result; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_stack_underflow(fetchdat, 0, 0); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (!is_nan) \ + result = floatx80_sub(use_var, a, &status); \ + \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_save_regi(result, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd##cycle_postfix) : ((x87_timings.fadd##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd##cycle_postfix) : ((x87_concurrency.fadd##cycle_postfix) * cpu_multi)); \ + return 0; \ + } + +// clang-format off +sf_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32) +#ifndef FPU_8087 +sf_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), FPU_handle_NaN32(a, temp, &result, &status), _32) +#endif +sf_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64) +#ifndef FPU_8087 +sf_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), FPU_handle_NaN64(a, temp, &result, &status), _64) +#endif + +sf_FPU(iw, uint16_t, 16, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16) +#ifndef FPU_8087 +sf_FPU(iw, uint16_t, 32, temp, geteaw(), int32_to_floatx80((int16_t)temp), 0, _i16) +#endif +sf_FPU(il, uint32_t, 16, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32) +#ifndef FPU_8087 +sf_FPU(il, uint32_t, 32, temp, geteal(), int32_to_floatx80((int32_t)temp), 0, _i32) +#endif +// clang-format on + +static int +sf_FADD_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_add(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} +static int +sf_FADD_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_add(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, fetchdat & 7); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} + +static int +sf_FADDP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_add(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} + +static int +sf_FDIV_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} + +static int +sf_FDIV_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, fetchdat & 7); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} +static int +sf_FDIVP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} + +static int +sf_FDIVR_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} +static int +sf_FDIVR_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, fetchdat & 7); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} +static int +sf_FDIVRP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_div(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fdiv) : (x87_timings.fdiv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fdiv) : (x87_concurrency.fdiv * cpu_multi)); + return 0; +} + +static int +sf_FMUL_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_mul(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi)); + return 0; +} +static int +sf_FMUL_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_mul(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi)); + return 0; +} +static int +sf_FMULP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_mul(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fmul) : (x87_timings.fmul * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fmul) : (x87_concurrency.fmul * cpu_multi)); + return 0; +} + +static int +sf_FSUB_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} +static int +sf_FSUB_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} +static int +sf_FSUBP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} + +static int +sf_FSUBR_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(fetchdat & 7); + b = FPU_read_regi(0); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} +static int +sf_FSUBR_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} +static int +sf_FSUBRP_sti_st0(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + result = floatx80_sub(a, b, &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, fetchdat & 7); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fadd) : (x87_timings.fadd * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fadd) : (x87_concurrency.fadd * cpu_multi)); + return 0; +} + +static int +sf_FSQRT(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = floatx80_sqrt(FPU_read_regi(0), &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsqrt) : (x87_timings.fsqrt * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsqrt) : (x87_concurrency.fsqrt * cpu_multi)); + return 0; +} + +static int +sf_FRNDINT(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = floatx80_round_to_int(FPU_read_regi(0), &status); + + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frndint) : (x87_timings.frndint * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frndint) : (x87_concurrency.frndint * cpu_multi)); + return 0; +} diff --git a/src/cpu/softfloat/x87_ops_compare.h b/src/cpu/softfloat/x87_ops_compare.h new file mode 100644 index 000000000..59135c05b --- /dev/null +++ b/src/cpu/softfloat/x87_ops_compare.h @@ -0,0 +1,489 @@ +#define cmp_FPU(name, optype, a_size, load_var, rw, use_var, is_nan, cycle_postfix) \ + static int sf_FCOM##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a; \ + int rc; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ + setcc(C0 | C2 | C3); \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (is_nan) { \ + rc = float_relation_unordered; \ + float_raise(&status, float_flag_invalid); \ + } else { \ + rc = floatx80_compare_two(a, use_var, &status); \ + } \ + setcc(FPU_status_word_flags_fpu_compare(rc)); \ + FPU_exception(fetchdat, status.float_exception_flags, 0); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + static int sf_FCOMP##name##_a##a_size(uint32_t fetchdat) \ + { \ + floatx80 a; \ + int rc; \ + struct float_status_t status; \ + optype temp; \ + FP_ENTER(); \ + fetch_ea_##a_size(fetchdat); \ + SEG_CHECK_READ(cpu_state.ea_seg); \ + load_var = rw; \ + if (cpu_state.abrt) \ + return 1;\ + clear_C1(); \ + if (IS_TAG_EMPTY(0)) { \ + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); \ + setcc(C0 | C2 | C3); \ + if (is_IA_masked()) \ + FPU_pop(); \ + \ + goto next_ins; \ + } \ + status = i387cw_to_softfloat_status_word(i387_get_control_word()); \ + a = FPU_read_regi(0); \ + if (is_nan) { \ + rc = float_relation_unordered; \ + float_raise(&status, float_flag_invalid); \ + } else { \ + rc = floatx80_compare_two(a, use_var, &status); \ + } \ + setcc(FPU_status_word_flags_fpu_compare(rc)); \ + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) \ + FPU_pop(); \ + \ +next_ins: \ + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom##cycle_postfix) : ((x87_timings.fcom##cycle_postfix) * cpu_multi)); \ + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom##cycle_postfix) : ((x87_concurrency.fcom##cycle_postfix) * cpu_multi)); \ + return 0; \ + } \ + +// clang-format off +cmp_FPU(s, float32, 16, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32) +#ifndef FPU_8087 +cmp_FPU(s, float32, 32, temp, geteal(), float32_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float32_is_nan(temp), _32) +#endif +cmp_FPU(d, float64, 16, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64) +#ifndef FPU_8087 +cmp_FPU(d, float64, 32, temp, geteaq(), float64_to_floatx80(temp, &status), floatx80_is_nan(a) || floatx80_is_unsupported(a) || float64_is_nan(temp), _64) +#endif + +cmp_FPU(iw, int16_t, 16, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16) +#ifndef FPU_8087 +cmp_FPU(iw, int16_t, 32, temp, (int16_t)geteaw(), int32_to_floatx80((int32_t)temp), 0, _i16) +#endif +cmp_FPU(il, int32_t, 16, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32) +#ifndef FPU_8087 +cmp_FPU(il, int32_t, 32, temp, (int32_t)geteal(), int32_to_floatx80(temp), 0, _i32) +#endif +// clang-format on + +static int +sf_FCOM_sti(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + FPU_exception(fetchdat, status.float_exception_flags, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); + return 0; +} + +static int +sf_FCOMP_sti(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + if (is_IA_masked()) { + FPU_pop(); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); + return 0; +} + +static int +sf_FCOMPP(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + if (is_IA_masked()) { + FPU_pop(); + FPU_pop(); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + rc = floatx80_compare_two(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); + return 0; +} + +#ifndef FPU_8087 +static int +sf_FUCOMPP(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + if (is_IA_masked()) { + FPU_pop(); + FPU_pop(); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + rc = floatx80_compare_quiet(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); + return 0; +} + +static int +sf_FCOMI_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + flags_rebuild(); + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); + FPU_write_eflags_fpu_compare(rc); + FPU_exception(fetchdat, status.float_exception_flags, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); + return 0; +} +static int +sf_FCOMIP_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + flags_rebuild(); + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG); + if (is_IA_masked()) { + FPU_pop(); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_two(a, b, &status); + FPU_write_eflags_fpu_compare(rc); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fcom) : (x87_timings.fcom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fcom) : (x87_concurrency.fcom * cpu_multi)); + return 0; +} + +static int +sf_FUCOM_sti(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + FPU_exception(fetchdat, status.float_exception_flags, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); + return 0; +} + +static int +sf_FUCOMP_sti(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + if (is_IA_masked()) + FPU_pop(); + + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); + return 0; +} + +static int +sf_FUCOMI_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + flags_rebuild(); + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); + FPU_write_eflags_fpu_compare(rc); + FPU_exception(fetchdat, status.float_exception_flags, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); + return 0; +} +static int +sf_FUCOMIP_st0_stj(uint32_t fetchdat) +{ + floatx80 a, b; + struct float_status_t status; + int rc; + + FP_ENTER(); + cpu_state.pc++; + flags_rebuild(); + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG); + if (is_IA_masked()) + FPU_pop(); + + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(fetchdat & 7); + rc = floatx80_compare_quiet(a, b, &status); + FPU_write_eflags_fpu_compare(rc); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fucom) : (x87_timings.fucom * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fucom) : (x87_concurrency.fucom * cpu_multi)); + return 0; +} +#endif + +static int +sf_FTST(uint32_t fetchdat) +{ + int rc; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + setcc(C0 | C2 | C3); + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + rc = floatx80_compare_two(FPU_read_regi(0), Const_Z, &status); + setcc(FPU_status_word_flags_fpu_compare(rc)); + FPU_exception(fetchdat, status.float_exception_flags, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ftst) : (x87_timings.ftst * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ftst) : (x87_concurrency.ftst * cpu_multi)); + return 0; +} + +static int +sf_FXAM(uint32_t fetchdat) +{ + floatx80 reg; + int sign; + float_class_t aClass; + + FP_ENTER(); + cpu_state.pc++; + reg = FPU_read_regi(0); + sign = floatx80_sign(reg); + /* + * Examine the contents of the ST(0) register and sets the condition + * code flags C0, C2 and C3 in the FPU status word to indicate the + * class of value or number in the register. + */ + if (IS_TAG_EMPTY(0)) { + setcc(C3 | C1 | C0); + } else { + aClass = floatx80_class(reg); + switch (aClass) { + case float_zero: + setcc(C3 | C1); + break; + case float_SNaN: + case float_QNaN: + // unsupported handled as NaNs + if (floatx80_is_unsupported(reg)) { + setcc(C1); + } else { + setcc(C1 | C0); + } + break; + case float_negative_inf: + case float_positive_inf: + setcc(C2 | C1 | C0); + break; + case float_denormal: + setcc(C3 | C2 | C1); + break; + case float_normalized: + setcc(C2 | C1); + break; + } + } + /* + * The C1 flag is set to the sign of the value in ST(0), regardless + * of whether the register is empty or full. + */ + if (!sign) + clear_C1(); + + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxam) : (x87_timings.fxam * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxam) : (x87_concurrency.fxam * cpu_multi)); + return 0; +} diff --git a/src/cpu/softfloat/x87_ops_const.h b/src/cpu/softfloat/x87_ops_const.h new file mode 100644 index 000000000..5e722ede4 --- /dev/null +++ b/src/cpu/softfloat/x87_ops_const.h @@ -0,0 +1,131 @@ +/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP + (and not one of RC_RND or RC_UP). + */ +#define DOWN_OR_CHOP() (fpu_state.cwd & FPU_CW_RC & FPU_RC_DOWN) + +static __inline floatx80 +FPU_round_const(const floatx80 a, int adj) +{ + floatx80 result = a; + result.fraction += adj; + return result; +} + +static int +sf_FLDL2T(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(FPU_round_const(Const_L2T, (fpu_state.cwd & FPU_CW_RC) == X87_ROUNDING_UP), 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); + return 0; +} + +static int +sf_FLDL2E(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(FPU_round_const(Const_L2E, DOWN_OR_CHOP() ? -1 : 0), 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); + return 0; +} + +static int +sf_FLDPI(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(FPU_round_const(Const_PI, DOWN_OR_CHOP() ? -1 : 0), 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); + return 0; +} + +static int +sf_FLDEG2(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(FPU_round_const(Const_LG2, DOWN_OR_CHOP() ? -1 : 0), 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); + return 0; +} + +static int +sf_FLDLN2(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(FPU_round_const(Const_LN2, DOWN_OR_CHOP() ? -1 : 0), 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_const) : (x87_timings.fld_const * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_const) : (x87_concurrency.fld_const * cpu_multi)); + return 0; +} + +static int +sf_FLD1(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(Const_1, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi)); + return 0; +} + +static int +sf_FLDZ(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) + FPU_stack_overflow(fetchdat); + else { + FPU_push(); + FPU_save_regi(Const_Z, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_z1) : (x87_timings.fld_z1 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_z1) : (x87_concurrency.fld_z1 * cpu_multi)); + return 0; +} diff --git a/src/cpu/softfloat/x87_ops_load_store.h b/src/cpu/softfloat/x87_ops_load_store.h new file mode 100644 index 000000000..c6dab1b44 --- /dev/null +++ b/src/cpu/softfloat/x87_ops_load_store.h @@ -0,0 +1,1314 @@ +/* + * 86Box A hypervisor and IBM PC system emulator that specializes in + * running old operating systems and software designed for IBM + * PC systems and compatibles from 1981 through fairly recent + * system designs based on the PCI bus. + * + * This file is part of the 86Box distribution. + * + * x87 FPU instructions core. + * + * + * + * Authors: Sarah Walker, + * Miran Grca, + * + * Copyright 2008-2019 Sarah Walker. + * Copyright 2016-2019 Miran Grca. + */ + +#define swap_values16u(a, b) { uint16_t tmp = a; a = b; b = tmp; } + +static int +sf_FILDiw_a16(uint32_t fetchdat) +{ + floatx80 result; + int16_t temp; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteaw(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int32_to_floatx80(temp); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_16) : (x87_timings.fild_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_16) : (x87_concurrency.fild_16 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FILDiw_a32(uint32_t fetchdat) +{ + floatx80 result; + int16_t temp; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + temp = geteaw(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int32_to_floatx80(temp); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_16) : (x87_timings.fild_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_16) : (x87_concurrency.fild_16 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FILDil_a16(uint32_t fetchdat) +{ + floatx80 result; + int32_t templ; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + templ = geteal(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int32_to_floatx80(templ); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_32) : (x87_timings.fild_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_32) : (x87_concurrency.fild_32 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FILDil_a32(uint32_t fetchdat) +{ + floatx80 result; + int32_t templ; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + templ = geteal(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int32_to_floatx80(templ); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_32) : (x87_timings.fild_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_32) : (x87_concurrency.fild_32 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FILDiq_a16(uint32_t fetchdat) +{ + floatx80 result; + int64_t temp64; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + temp64 = geteaq(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int64_to_floatx80(temp64); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_64) : (x87_timings.fild_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_64) : (x87_concurrency.fild_64 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FILDiq_a32(uint32_t fetchdat) +{ + floatx80 result; + int64_t temp64; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + temp64 = geteaq(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + result = int64_to_floatx80(temp64); + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fild_64) : (x87_timings.fild_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fild_64) : (x87_concurrency.fild_64 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FLDs_a16(uint32_t fetchdat) +{ + struct float_status_t status; + floatx80 result; + float32 load_reg; + unsigned unmasked; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + load_reg = geteal(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float32_to_floatx80(load_reg, &status); + unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); + if (!(unmasked & FPU_CW_Invalid)) { + FPU_push(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FLDs_a32(uint32_t fetchdat) +{ + struct float_status_t status; + floatx80 result; + float32 load_reg; + unsigned unmasked; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + load_reg = geteal(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float32_to_floatx80(load_reg, &status); + unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); + if (!(unmasked & FPU_CW_Invalid)) { + FPU_push(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FLDd_a16(uint32_t fetchdat) +{ + struct float_status_t status; + floatx80 result; + float64 load_reg; + unsigned unmasked; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + load_reg = geteaq(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float64_to_floatx80(load_reg, &status); + unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); + if (!(unmasked & FPU_CW_Invalid)) { + FPU_push(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_64) : (x87_timings.fld_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_64) : (x87_concurrency.fld_64 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FLDd_a32(uint32_t fetchdat) +{ + struct float_status_t status; + floatx80 result; + float64 load_reg; + unsigned unmasked; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + load_reg = geteaq(); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = float64_to_floatx80(load_reg, &status); + unmasked = FPU_exception(fetchdat, status.float_exception_flags, 0); + if (!(unmasked & FPU_CW_Invalid)) { + FPU_push(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_64) : (x87_timings.fld_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_64) : (x87_concurrency.fld_64 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FLDe_a16(uint32_t fetchdat) +{ + floatx80 result; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + result.fraction = readmemq(easeg, cpu_state.eaaddr); + result.exp = readmemw(easeg, cpu_state.eaaddr + 8); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_80) : (x87_timings.fld_80 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_80) : (x87_concurrency.fld_80 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FLDe_a32(uint32_t fetchdat) +{ + floatx80 result; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + result.fraction = readmemq(easeg, cpu_state.eaaddr); + result.exp = readmemw(easeg, cpu_state.eaaddr + 8); + if (cpu_state.abrt) + return 1; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + } else { + FPU_push(); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld_80) : (x87_timings.fld_80 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld_80) : (x87_concurrency.fld_80 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FLD_sti(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 sti_reg; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (!IS_TAG_EMPTY(-1)) { + FPU_stack_overflow(fetchdat); + goto next_ins; + } + sti_reg = floatx80_default_nan; + if (IS_TAG_EMPTY(fetchdat & 7)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + sti_reg = FPU_read_regi(fetchdat & 7); + } + + FPU_push(); + FPU_save_regi(sti_reg, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fld) : (x87_timings.fld * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fld) : (x87_concurrency.fld * cpu_multi)); + return 0; +} + +static int +sf_FISTiw_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) { + goto next_ins; + } + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int16(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaw(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FISTiw_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int16(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaw(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FISTPiw_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int16(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaw(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FISTPiw_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int16_t save_reg = int16_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int16(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaw(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_16) : (x87_timings.fist_16 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_16) : (x87_concurrency.fist_16 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FISTil_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FISTil_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FISTPil_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FISTPil_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int32_t save_reg = int32_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_32) : (x87_timings.fist_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_32) : (x87_concurrency.fist_32 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FISTPiq_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int64_t save_reg = int64_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case origial FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_64) : (x87_timings.fist_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_64) : (x87_concurrency.fist_64 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FISTPiq_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + int64_t save_reg = int64_indefinite; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_int64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case origial FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fist_64) : (x87_timings.fist_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fist_64) : (x87_concurrency.fist_64 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FBSTP_PACKED_BCD_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + uint16_t save_reg_hi = 0xffff; + uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); + floatx80 reg; + int64_t save_val; + int sign; + int c; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + reg = FPU_read_regi(0); + save_val = floatx80_to_int64(reg, &status); + sign = (reg.exp & 0x8000) != 0; + if (sign) + save_val = -save_val; + + if (save_val > BX_CONST64(999999999999999999)) + status.float_exception_flags = float_flag_invalid; // throw away other flags + + if (!(status.float_exception_flags & float_flag_invalid)) { + save_reg_hi = sign ? 0x8000 : 0; + save_reg_lo = 0; + for (int i = 0; i < 16; i++) { + save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i); + save_val /= 10; + } + save_reg_hi += (uint16_t)(save_val % 10); + save_val /= 10; + save_reg_hi += (uint16_t)(save_val % 10) << 4; + } + /* check for fpu arithmetic exceptions */ + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + + // write packed bcd to memory + writememq(easeg, cpu_state.eaaddr, save_reg_lo); + writememw(easeg, cpu_state.eaaddr + 8, save_reg_hi); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fbstp) : (x87_timings.fbstp * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fbstp) : (x87_concurrency.fbstp * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FBSTP_PACKED_BCD_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + uint16_t save_reg_hi = 0xffff; + uint64_t save_reg_lo = BX_CONST64(0xC000000000000000); + floatx80 reg; + int64_t save_val; + int sign; + int c; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + reg = FPU_read_regi(0); + save_val = floatx80_to_int64(reg, &status); + sign = (reg.exp & 0x8000) != 0; + if (sign) + save_val = -save_val; + + if (save_val > BX_CONST64(999999999999999999)) + status.float_exception_flags = float_flag_invalid; // throw away other flags + + if (!(status.float_exception_flags & float_flag_invalid)) { + save_reg_hi = sign ? 0x8000 : 0; + save_reg_lo = 0; + for (int i = 0; i < 16; i++) { + save_reg_lo += ((uint64_t)(save_val % 10)) << (4 * i); + save_val /= 10; + } + save_reg_hi += (uint16_t)(save_val % 10); + save_val /= 10; + save_reg_hi += (uint16_t)(save_val % 10) << 4; + } + /* check for fpu arithmetic exceptions */ + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + + // write packed bcd to memory + writememq(easeg, cpu_state.eaaddr, save_reg_lo); + writememw(easeg, cpu_state.eaaddr + 8, save_reg_hi); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fbstp) : (x87_timings.fbstp * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fbstp) : (x87_concurrency.fbstp * cpu_multi)); + return 0; +} +#endif + +static int +sf_FSTs_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FSTs_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FSTPs_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + if (cpu_state.abrt) { + return 1; + } + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FSTPs_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float32 save_reg = float32_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float32(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteal(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_32) : (x87_timings.fst_32 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_32) : (x87_concurrency.fst_32 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FSTd_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FSTd_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + fpu_state.swd = sw; + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FSTPd_a16(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) { + goto next_ins; + } + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) { + goto next_ins; + } + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FSTPd_a32(uint32_t fetchdat) +{ + struct float_status_t status; + uint16_t sw = fpu_state.swd; + float64 save_reg = float64_default_nan; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + save_reg = floatx80_to_float64(FPU_read_regi(0), &status); + if (FPU_exception(fetchdat, status.float_exception_flags, 1)) + goto next_ins; + } + // store to the memory might generate an exception, in this case original FPU_SW must be kept + swap_values16u(sw, fpu_state.swd); + seteaq(save_reg); + if (cpu_state.abrt) + return 1; + fpu_state.swd = sw; + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_64) : (x87_timings.fst_64 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_64) : (x87_concurrency.fst_64 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FSTPe_a16(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 save_reg; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + if (cpu_state.abrt) + return 1; + save_reg = floatx80_default_nan; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) { + goto next_ins; + } + } else { + save_reg = FPU_read_regi(0); + } + writememq(easeg, cpu_state.eaaddr, save_reg.fraction); + writememw(easeg, cpu_state.eaaddr + 8, save_reg.exp); + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_80) : (x87_timings.fst_80 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_80) : (x87_concurrency.fst_80 * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FSTPe_a32(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 save_reg; + + FP_ENTER(); + FPU_check_pending_exceptions(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + if (cpu_state.abrt) + return 1; + save_reg = floatx80_default_nan; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (!is_IA_masked()) + goto next_ins; + } else { + save_reg = FPU_read_regi(0); + } + writememq(easeg, cpu_state.eaaddr, save_reg.fraction); + writememw(easeg, cpu_state.eaaddr + 8, save_reg.exp); + FPU_pop(); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst_80) : (x87_timings.fst_80 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst_80) : (x87_concurrency.fst_80 * cpu_multi)); + return 0; +} +#endif + +static int +sf_FST_sti(uint32_t fetchdat) +{ + floatx80 st0_reg; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 0); + } else { + st0_reg = FPU_read_regi(0); + FPU_save_regi(st0_reg, fetchdat & 7); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst) : (x87_timings.fst * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst) : (x87_concurrency.fst * cpu_multi)); + return 0; +} + +static int +sf_FSTP_sti(uint32_t fetchdat) +{ + floatx80 st0_reg; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, fetchdat & 7, 1); + } else { + st0_reg = FPU_read_regi(0); + FPU_save_regi(st0_reg, fetchdat & 7); + FPU_pop(); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fst) : (x87_timings.fst * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fst) : (x87_concurrency.fst * cpu_multi)); + return 0; +} + +#ifndef FPU_8087 +# define sf_FCMOV(condition) \ + static int sf_FCMOV##condition(uint32_t fetchdat) \ + { \ + FP_ENTER(); \ + FPU_check_pending_exceptions(); \ + cpu_state.pc++; \ + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(fetchdat & 7)) \ + FPU_stack_underflow(fetchdat, 0, 0); \ + else { \ + if (cond_##condition) { \ + FPU_save_regi(FPU_read_regi(fetchdat & 7), 0); \ + } \ + } \ + CLOCK_CYCLES_FPU(4); \ + return 0; \ + } + +# define cond_U (PF_SET()) +# define cond_NU (!PF_SET()) + +// clang-format off +sf_FCMOV(B) +sf_FCMOV(E) +sf_FCMOV(BE) +sf_FCMOV(U) +sf_FCMOV(NB) +sf_FCMOV(NE) +sf_FCMOV(NBE) +sf_FCMOV(NU) +// clang-format on +#endif diff --git a/src/cpu/softfloat/x87_ops_misc.h b/src/cpu/softfloat/x87_ops_misc.h new file mode 100644 index 000000000..d8a3d7368 --- /dev/null +++ b/src/cpu/softfloat/x87_ops_misc.h @@ -0,0 +1,134 @@ +static int +sf_FXCH_sti(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 st0_reg, sti_reg; + int st0_tag, sti_tag; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + st0_tag = FPU_gettagi(0); + sti_tag = FPU_gettagi(fetchdat & 7); + st0_reg = FPU_read_regi(0); + sti_reg = FPU_read_regi(fetchdat & 7); + + clear_C1(); + if ((st0_tag == X87_TAG_EMPTY) || (sti_tag == X87_TAG_EMPTY)) { + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + if (is_IA_masked()) { + /* Masked response */ + if (st0_tag == X87_TAG_EMPTY) + st0_reg = floatx80_default_nan; + if (sti_tag == X87_TAG_EMPTY) + sti_reg = floatx80_default_nan; + } else + goto next_ins; + } + FPU_save_regi(st0_reg, fetchdat & 7); + FPU_save_regi(sti_reg, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxch) : (x87_timings.fxch * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxch) : (x87_concurrency.fxch * cpu_multi)); + return 0; +} + +static int +sf_FCHS(uint32_t fetchdat) +{ + floatx80 st0_reg; + floatx80 result; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + if (IS_TAG_EMPTY(0)) + FPU_stack_underflow(fetchdat, 0, 0); + else { + clear_C1(); + st0_reg = FPU_read_regi(0); + result = floatx80_chs(st0_reg); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fchs) : (x87_timings.fchs * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fchs) : (x87_concurrency.fchs * cpu_multi)); + return 0; +} + +static int +sf_FABS(uint32_t fetchdat) +{ + floatx80 st0_reg; + floatx80 result; + + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + if (IS_TAG_EMPTY(0)) + FPU_stack_underflow(fetchdat, 0, 0); + else { + clear_C1(); + st0_reg = FPU_read_regi(0); + result = floatx80_abs(st0_reg); + FPU_save_regi(result, 0); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fabs) : (x87_timings.fabs * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fabs) : (x87_concurrency.fabs * cpu_multi)); + return 0; +} + +static int +sf_FDECSTP(uint32_t fetchdat) +{ + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + fpu_state.tos = (fpu_state.tos - 1) & 7; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi)); + return 0; +} + +static int +sf_FINCSTP(uint32_t fetchdat) +{ + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + fpu_state.tos = (fpu_state.tos + 1) & 7; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fincdecstp) : (x87_timings.fincdecstp * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fincdecstp) : (x87_concurrency.fincdecstp * cpu_multi)); + return 0; +} + +static int +sf_FFREE_sti(uint32_t fetchdat) +{ + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + FPU_settagi(X87_TAG_EMPTY, fetchdat & 7); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi)); + return 0; +} + +static int +sf_FFREEP_sti(uint32_t fetchdat) +{ + FP_ENTER(); + FPU_check_pending_exceptions(); + cpu_state.pc++; + clear_C1(); + FPU_settagi(X87_TAG_EMPTY, fetchdat & 7); + if (cpu_state.abrt) + return 1; + FPU_pop(); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.ffree) : (x87_timings.ffree * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.ffree) : (x87_concurrency.ffree * cpu_multi)); + return 0; +} diff --git a/src/cpu/softfloat/x87_ops_other.h b/src/cpu/softfloat/x87_ops_other.h new file mode 100644 index 000000000..4de3b1c90 --- /dev/null +++ b/src/cpu/softfloat/x87_ops_other.h @@ -0,0 +1,593 @@ +static uint32_t +fpu_save_environment(void) +{ + int tag; + unsigned offset; + + /* read all registers in stack order and update x87 tag word */ + for (int n = 0; n < 8; n++) { + // update tag only if it is not empty + if (!IS_TAG_EMPTY(n)) { + tag = FPU_tagof(FPU_read_regi(n)); + FPU_settagi(tag, n); + } + } + + fpu_state.swd = (fpu_state.swd & ~(7 << 11)) | ((fpu_state.tos & 7) << 11); + + switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { + case 0x000: { /*16-bit real mode*/ + uint16_t tmp; + uint32_t fp_ip, fp_dp; + + fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; + fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; + + tmp = i387_get_control_word(); + writememw(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = i387_get_status_word(); + writememw(easeg, cpu_state.eaaddr + 0x02, tmp); + tmp = fpu_state.tag; + writememw(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = fp_ip & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x06, tmp); + tmp = (uint16_t)((fp_ip & 0xf0000) >> 4) | fpu_state.foo; + writememw(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = fp_dp & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); + tmp = (uint16_t)((fp_dp & 0xf0000) >> 4); + writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); + offset = 0x0e; + } + break; + case 0x001: {/*16-bit protected mode*/ + uint16_t tmp; + tmp = i387_get_control_word(); + writememw(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = i387_get_status_word(); + writememw(easeg, cpu_state.eaaddr + 0x02, tmp); + tmp = fpu_state.tag; + writememw(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = (uint16_t)(fpu_state.fip) & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x06, tmp); + tmp = fpu_state.fcs; + writememw(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = (uint16_t)(fpu_state.fdp) & 0xffff; + writememw(easeg, cpu_state.eaaddr + 0x0a, tmp); + tmp = fpu_state.fds; + writememw(easeg, cpu_state.eaaddr + 0x0c, tmp); + offset = 0x0e; + } + break; + case 0x100: { /*32-bit real mode*/ + uint32_t tmp, fp_ip, fp_dp; + + fp_ip = ((uint32_t)(fpu_state.fcs << 4)) | fpu_state.fip; + fp_dp = ((uint32_t)(fpu_state.fds << 4)) | fpu_state.fdp; + + tmp = 0xffff0000 | i387_get_control_word(); + writememl(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = 0xffff0000 | i387_get_status_word(); + writememl(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = 0xffff0000 | fpu_state.tag; + writememl(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = 0xffff0000 | (fp_ip & 0xffff); + writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); + tmp = ((fp_ip & 0xffff0000) >> 4) | fpu_state.foo; + writememl(easeg, cpu_state.eaaddr + 0x10, tmp); + tmp = 0xffff0000 | (fp_dp & 0xffff); + writememl(easeg, cpu_state.eaaddr + 0x14, tmp); + tmp = (fp_dp & 0xffff0000) >> 4; + writememl(easeg, cpu_state.eaaddr + 0x18, tmp); + offset = 0x1c; + } + break; + case 0x101: { /*32-bit protected mode*/ + uint32_t tmp; + tmp = 0xffff0000 | i387_get_control_word(); + writememl(easeg, cpu_state.eaaddr + 0x00, tmp); + tmp = 0xffff0000 | i387_get_status_word(); + writememl(easeg, cpu_state.eaaddr + 0x04, tmp); + tmp = 0xffff0000 | fpu_state.tag; + writememl(easeg, cpu_state.eaaddr + 0x08, tmp); + tmp = (uint32_t)(fpu_state.fip); + writememl(easeg, cpu_state.eaaddr + 0x0c, tmp); + tmp = fpu_state.fcs | (((uint32_t)(fpu_state.foo)) << 16); + writememl(easeg, cpu_state.eaaddr + 0x10, tmp); + tmp = (uint32_t)(fpu_state.fdp); + writememl(easeg, cpu_state.eaaddr + 0x14, tmp); + tmp = 0xffff0000 | fpu_state.fds; + writememl(easeg, cpu_state.eaaddr + 0x18, tmp); + offset = 0x1c; + } + break; + } + + return (cpu_state.eaaddr + offset); +} + +static uint32_t +fpu_load_environment(void) +{ + unsigned offset; + + switch ((cr0 & 1) | (cpu_state.op32 & 0x100)) { + case 0x000: { /*16-bit real mode*/ + uint16_t tmp; + uint32_t fp_ip, fp_dp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); + fp_dp = (tmp & 0xf000) << 4; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); + fpu_state.fdp = fp_dp | tmp; + fpu_state.fds = 0; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); + fp_ip = (tmp & 0xf000) << 4; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); + fpu_state.fip = fp_ip | tmp; + fpu_state.fcs = 0; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); + fpu_state.tag = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); + fpu_state.swd = tmp; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp; + offset = 0x0e; + } + break; + case 0x001: {/*16-bit protected mode*/ + uint16_t tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0c); + fpu_state.fds = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x0a); + fpu_state.fdp = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x08); + fpu_state.fcs = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x06); + fpu_state.fip = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x04); + fpu_state.tag = tmp; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x02); + fpu_state.swd = tmp; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmemw(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp; + offset = 0x0e; + } + break; + case 0x100: { /*32-bit real mode*/ + uint32_t tmp, fp_ip, fp_dp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); + fp_dp = (tmp & 0x0ffff000) << 4; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); + fp_dp |= (tmp & 0xffff); + fpu_state.fdp = fp_dp; + fpu_state.fds = 0; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); + fpu_state.foo = tmp & 0x07ff; + fp_ip = (tmp & 0x0ffff000) << 4; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); + fp_ip |= (tmp & 0xffff); + fpu_state.fip = fp_ip; + fpu_state.fcs = 0; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); + fpu_state.tag = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); + fpu_state.swd = tmp & 0xffff; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp & 0xffff; + offset = 0x1c; + } + break; + case 0x101: { /*32-bit protected mode*/ + uint32_t tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x18); + fpu_state.fds = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x14); + fpu_state.fdp = tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x10); + fpu_state.fcs = tmp & 0xffff; + fpu_state.foo = (tmp >> 16) & 0x07ff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x0c); + fpu_state.fip = tmp; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x08); + fpu_state.tag = tmp & 0xffff; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x04); + fpu_state.swd = tmp & 0xffff; + fpu_state.tos = (tmp >> 11) & 7; + tmp = readmeml(easeg, cpu_state.eaaddr + 0x00); + fpu_state.cwd = tmp & 0xffff; + offset = 0x1c; + } + break; + } + + /* always set bit 6 as '1 */ + fpu_state.cwd = (fpu_state.cwd & ~FPU_CW_Reserved_Bits) | 0x0040; + + /* check for unmasked exceptions */ + if (fpu_state.swd & ~fpu_state.cwd & FPU_CW_Exceptions_Mask) { + /* set the B and ES bits in the status-word */ + fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward); + } else { + /* clear the B and ES bits in the status-word */ + fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward); + } + + return (cpu_state.eaaddr + offset); +} + +static int +sf_FLDCW_a16(uint32_t fetchdat) +{ + uint16_t tempw; + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + tempw = geteaw(); + if (cpu_state.abrt) + return 1; + fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1 + /* check for unmasked exceptions */ + if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) { + /* set the B and ES bits in the status-word */ + fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward); + } else { + /* clear the B and ES bits in the status-word */ + fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi)); + return 0; +} +#ifndef FPU_8087 +static int +sf_FLDCW_a32(uint32_t fetchdat) +{ + uint16_t tempw; + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + tempw = geteaw(); + if (cpu_state.abrt) + return 1; + fpu_state.cwd = (tempw & ~FPU_CW_Reserved_Bits) | 0x0040; // bit 6 is reserved as '1 + /* check for unmasked exceptions */ + if (fpu_state.swd & (~fpu_state.cwd & FPU_CW_Exceptions_Mask)) { + /* set the B and ES bits in the status-word */ + fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward); + } else { + /* clear the B and ES bits in the status-word */ + fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldcw) : (x87_timings.fldcw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldcw) : (x87_concurrency.fldcw * cpu_multi)); + return 0; +} +#endif + +static int +sf_FNSTCW_a16(uint32_t fetchdat) +{ + uint16_t cwd = i387_get_control_word(); + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + seteaw(cwd); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FNSTCW_a32(uint32_t fetchdat) +{ + uint16_t cwd = i387_get_control_word(); + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + seteaw(cwd); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FNSTSW_a16(uint32_t fetchdat) +{ + uint16_t swd = i387_get_status_word(); + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + seteaw(swd); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FNSTSW_a32(uint32_t fetchdat) +{ + uint16_t swd = i387_get_status_word(); + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + seteaw(swd); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi)); + return cpu_state.abrt; +} +#endif + +#ifdef FPU_8087 +static int +sf_FI(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + fpu_state.cwd &= ~FPU_SW_Summary; + if (rmdat == 0xe1) + fpu_state.cwd |= FPU_SW_Summary; + wait(3, 0); + return 0; +} +#else +static int +sf_FNSTSW_AX(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + AX = i387_get_status_word(); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstcw_sw) : (x87_timings.fstcw_sw * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstcw_sw) : (x87_concurrency.fstcw_sw * cpu_multi)); + return 0; +} +#endif + +static int +sf_FRSTOR_a16(uint32_t fetchdat) +{ + floatx80 tmp; + int offset; + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + offset = fpu_load_environment(); + for (int n = 0; n < 8; n++) { + tmp.fraction = readmemq(easeg, offset + (n * 10)); + tmp.exp = readmemw(easeg, offset + (n * 10) + 8); + FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FRSTOR_a32(uint32_t fetchdat) +{ + floatx80 tmp; + int offset; + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + offset = fpu_load_environment(); + for (int n = 0; n < 8; n++) { + tmp.fraction = readmemq(easeg, offset + (n * 10)); + tmp.exp = readmemw(easeg, offset + (n * 10) + 8); + FPU_save_regi_tag(tmp, IS_TAG_EMPTY(n) ? X87_TAG_EMPTY : FPU_tagof(tmp), n); + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.frstor) : (x87_timings.frstor * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.frstor) : (x87_concurrency.frstor * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FNSAVE_a16(uint32_t fetchdat) +{ + floatx80 stn; + int offset; + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + offset = fpu_save_environment(); + /* save all registers in stack order. */ + for (int m = 0; m < 8; m++) { + stn = FPU_read_regi(m); + writememq(easeg, offset + (m * 10), stn.fraction); + writememw(easeg, offset + (m * 10) + 8, stn.exp); + } + +#ifdef FPU_8087 + fpu_state.swd = 0x3FF; +#else + fpu_state.cwd = 0x37F; +#endif + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0xffff; + cpu_state.ismmx = 0; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FNSAVE_a32(uint32_t fetchdat) +{ + floatx80 stn; + int offset; + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + offset = fpu_save_environment(); + /* save all registers in stack order. */ + for (int m = 0; m < 8; m++) { + stn = FPU_read_regi(m); + writememq(easeg, offset + (m * 10), stn.fraction); + writememw(easeg, offset + (m * 10) + 8, stn.exp); + } + +#ifdef FPU_8087 + fpu_state.swd = 0x3FF; +#else + fpu_state.cwd = 0x37F; +#endif + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0xffff; + cpu_state.ismmx = 0; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsave) : (x87_timings.fsave * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsave) : (x87_concurrency.fsave * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FNCLEX(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + fpu_state.swd &= ~(FPU_SW_Backward | FPU_SW_Summary | FPU_SW_Stack_Fault | FPU_SW_Precision | + FPU_SW_Underflow | FPU_SW_Overflow | FPU_SW_Zero_Div | FPU_SW_Denormal_Op | + FPU_SW_Invalid); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi)); + return 0; +} + +static int +sf_FNINIT(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; +#ifdef FPU_8087 + fpu_state.cwd = 0x3FF; +#else + fpu_state.cwd = 0x37F; +#endif + fpu_state.swd = 0; + fpu_state.tos = 0; + fpu_state.tag = 0xffff; + fpu_state.foo = 0; + fpu_state.fds = 0; + fpu_state.fdp = 0; + fpu_state.fcs = 0; + fpu_state.fip = 0; + cpu_state.ismmx = 0; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.finit) : (x87_timings.finit * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.finit) : (x87_concurrency.finit * cpu_multi)); + CPU_BLOCK_END(); + return 0; +} + +static int +sf_FLDENV_a16(uint32_t fetchdat) +{ + int tag; + + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + fpu_load_environment(); + /* read all registers in stack order and update x87 tag word */ + for (int n = 0; n < 8; n++) { + // update tag only if it is not empty + if (!IS_TAG_EMPTY(n)) { + tag = FPU_tagof(FPU_read_regi(n)); + FPU_settagi(tag, n); + } + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FLDENV_a32(uint32_t fetchdat) +{ + int tag; + + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_READ(cpu_state.ea_seg); + fpu_load_environment(); + /* read all registers in stack order and update x87 tag word */ + for (int n = 0; n < 8; n++) { + // update tag only if it is not empty + if (!IS_TAG_EMPTY(n)) { + tag = FPU_tagof(FPU_read_regi(n)); + FPU_settagi(tag, n); + } + } + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fldenv) : (x87_timings.fldenv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fldenv) : (x87_concurrency.fldenv * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FNSTENV_a16(uint32_t fetchdat) +{ + FP_ENTER(); + fetch_ea_16(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + fpu_save_environment(); + /* mask all floating point exceptions */ + fpu_state.cwd |= FPU_CW_Exceptions_Mask; + /* clear the B and ES bits in the status word */ + fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi)); + return cpu_state.abrt; +} +#ifndef FPU_8087 +static int +sf_FNSTENV_a32(uint32_t fetchdat) +{ + FP_ENTER(); + fetch_ea_32(fetchdat); + SEG_CHECK_WRITE(cpu_state.ea_seg); + fpu_save_environment(); + /* mask all floating point exceptions */ + fpu_state.cwd |= FPU_CW_Exceptions_Mask; + /* clear the B and ES bits in the status word */ + fpu_state.swd &= ~(FPU_SW_Backward|FPU_SW_Summary); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fstenv) : (x87_timings.fstenv * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fstenv) : (x87_concurrency.fstenv * cpu_multi)); + return cpu_state.abrt; +} +#endif + +static int +sf_FNOP(uint32_t fetchdat) +{ + FP_ENTER(); + cpu_state.pc++; + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fnop) : (x87_timings.fnop * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fnop) : (x87_concurrency.fnop * cpu_multi)); + return 0; +} diff --git a/src/cpu/softfloat/x87_ops_trans.h b/src/cpu/softfloat/x87_ops_trans.h new file mode 100644 index 000000000..7ecbbe4ec --- /dev/null +++ b/src/cpu/softfloat/x87_ops_trans.h @@ -0,0 +1,418 @@ +static int +sf_F2XM1(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + result = f2xm1(FPU_read_regi(0), &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.f2xm1) : (x87_timings.f2xm1 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.f2xm1) : (x87_concurrency.f2xm1 * cpu_multi)); + return 0; +} + +static int +sf_FYL2X(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 1, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + result = fyl2x(FPU_read_regi(0), FPU_read_regi(1), &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2x) : (x87_timings.fyl2x * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2x) : (x87_concurrency.fyl2x * cpu_multi)); + return 0; +} + +static int +sf_FPTAN(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + floatx80 y; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) { + if (IS_TAG_EMPTY(0)) + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + else + FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0); + + /* The masked response */ + if (is_IA_masked()) { + FPU_save_regi(floatx80_default_nan, 0); + FPU_push(); + FPU_save_regi(floatx80_default_nan, 0); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + y = FPU_read_regi(0); + if (ftan(&y, &status) == -1) { + fpu_state.swd |= C2; + goto next_ins; + } + + if (floatx80_is_nan(y)) { + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(y, 0); + FPU_push(); + FPU_save_regi(y, 0); + } + goto next_ins; + } + + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(y, 0); + FPU_push(); + FPU_save_regi(Const_1, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fptan) : (x87_timings.fptan * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fptan) : (x87_concurrency.fptan * cpu_multi)); + return 0; +} + +static int +sf_FPATAN(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 1, 1); + goto next_ins; + } + a = FPU_read_regi(0); + b = FPU_read_regi(1); + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + result = fpatan(a, b, &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_pop(); + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fpatan) : (x87_timings.fpatan * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fpatan) : (x87_concurrency.fpatan * cpu_multi)); + return 0; +} + +static int +sf_FXTRACT(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + struct float_status_t status; + floatx80 a, b; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + +#if 0 //TODO + if ((IS_TAG_EMPTY(0) || IS_TAG_EMPTY(-1))) { + if (IS_TAG_EMPTY(0)) + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + else + FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0); + + /* The masked response */ + if (is_IA_masked()) { + FPU_save_regi(floatx80_default_nan, 0); + FPU_push(); + FPU_save_regi(floatx80_default_nan, 0); + } + goto next_ins; + } +#endif + + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = floatx80_extract(&a, &status); + if (!FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(b, 0); // exponent + FPU_push(); + FPU_save_regi(a, 0); // fraction + } + +#if 0 //TODO. +next_ins: +#endif + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi)); + return 0; +} + +static int +sf_FPREM1(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + uint64_t quotient = 0; + int flags, cc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + flags = floatx80_ieee754_remainder(a, b, &result, "ient, &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (flags >= 0) { + cc = 0; + if (flags) + cc = C2; + else { + if (quotient & 1) + cc |= C1; + if (quotient & 2) + cc |= C3; + if (quotient & 4) + cc |= C0; + } + setcc(cc); + } + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem1) : (x87_timings.fprem1 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem1) : (x87_concurrency.fprem1 * cpu_multi)); + return 0; +} + +static int +sf_FPREM(uint32_t fetchdat) +{ + floatx80 a, b, result; + struct float_status_t status; + uint64_t quotient = 0; + int flags, cc; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + a = FPU_read_regi(0); + b = FPU_read_regi(1); + // handle unsupported extended double-precision floating encodings + flags = floatx80_remainder(a, b, &result, "ient, &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + if (flags >= 0) { + cc = 0; + if (flags) + cc = C2; + else { + if (quotient & 1) + cc |= C1; + if (quotient & 2) + cc |= C3; + if (quotient & 4) + cc |= C0; + } + setcc(cc); + } + FPU_save_regi(result, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fprem) : (x87_timings.fprem * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem) : (x87_concurrency.fprem * cpu_multi)); + return 0; +} + +static int +sf_FYL2XP1(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 1, 1); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + result = fyl2xp1(FPU_read_regi(0), FPU_read_regi(1), &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(result, 1); + FPU_pop(); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fyl2xp1) : (x87_timings.fyl2xp1 * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fyl2xp1) : (x87_concurrency.fyl2xp1 * cpu_multi)); + return 0; +} + +#ifndef FPU_8087 +static int +sf_FSINCOS(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + struct float_status_t status; + floatx80 y, sin_y, cos_y; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0) || !IS_TAG_EMPTY(-1)) { + if (IS_TAG_EMPTY(0)) + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); + else + FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0); + + /* The masked response */ + if (is_IA_masked()) { + FPU_save_regi(floatx80_default_nan, 0); + FPU_push(); + FPU_save_regi(floatx80_default_nan, 0); + } + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + y = FPU_read_regi(0); + if (fsincos(y, &sin_y, &cos_y, &status) == -1) { + fpu_state.swd |= C2; + goto next_ins; + } + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) { + FPU_save_regi(sin_y, 0); + FPU_push(); + FPU_save_regi(cos_y, 0); + } + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsincos) : (x87_timings.fsincos * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsincos) : (x87_concurrency.fsincos * cpu_multi)); + return 0; +} +#endif + +static int +sf_FSCALE(uint32_t fetchdat) +{ + floatx80 result; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word()); + result = floatx80_scale(FPU_read_regi(0), FPU_read_regi(1), &status); + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(result, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fscale) : (x87_timings.fscale * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fscale) : (x87_concurrency.fscale * cpu_multi)); + return 0; +} + +#ifndef FPU_8087 +static int +sf_FSIN(uint32_t fetchdat) +{ + floatx80 y; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + y = FPU_read_regi(0); + if (fsin(&y, &status) == -1) { + fpu_state.swd |= C2; + goto next_ins; + } + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(y, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi)); + return 0; +} + +static int +sf_FCOS(uint32_t fetchdat) +{ + floatx80 y; + struct float_status_t status; + + FP_ENTER(); + cpu_state.pc++; + clear_C1(); + clear_C2(); + if (IS_TAG_EMPTY(0)) { + FPU_stack_underflow(fetchdat, 0, 0); + goto next_ins; + } + status = i387cw_to_softfloat_status_word(i387_get_control_word() | FPU_PR_80_BITS); + y = FPU_read_regi(0); + if (fcos(&y, &status) == -1) { + fpu_state.swd |= C2; + goto next_ins; + } + if (! FPU_exception(fetchdat, status.float_exception_flags, 0)) + FPU_save_regi(y, 0); + +next_ins: + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fsin_cos) : (x87_timings.fsin_cos * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fsin_cos) : (x87_concurrency.fsin_cos * cpu_multi)); + return 0; +} +#endif diff --git a/src/cpu/x86_ops.h b/src/cpu/x86_ops.h index ca4d83d69..9d34c71ec 100644 --- a/src/cpu/x86_ops.h +++ b/src/cpu/x86_ops.h @@ -151,6 +151,7 @@ extern const OpFn dynarec_ops_3DNOWE[256]; void x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f); #endif + extern const OpFn *x86_opcodes; extern const OpFn *x86_opcodes_0f; extern const OpFn *x86_opcodes_d8_a16; @@ -205,6 +206,38 @@ extern const OpFn ops_pentiumpro_0f[1024]; extern const OpFn ops_pentium2_0f[1024]; extern const OpFn ops_pentium2d_0f[1024]; +extern const OpFn ops_sf_fpu_287_d9_a16[256]; +extern const OpFn ops_sf_fpu_287_d9_a32[256]; +extern const OpFn ops_sf_fpu_287_da_a16[256]; +extern const OpFn ops_sf_fpu_287_da_a32[256]; +extern const OpFn ops_sf_fpu_287_db_a16[256]; +extern const OpFn ops_sf_fpu_287_db_a32[256]; +extern const OpFn ops_sf_fpu_287_dc_a16[32]; +extern const OpFn ops_sf_fpu_287_dc_a32[32]; +extern const OpFn ops_sf_fpu_287_dd_a16[256]; +extern const OpFn ops_sf_fpu_287_dd_a32[256]; +extern const OpFn ops_sf_fpu_287_de_a16[256]; +extern const OpFn ops_sf_fpu_287_de_a32[256]; +extern const OpFn ops_sf_fpu_287_df_a16[256]; +extern const OpFn ops_sf_fpu_287_df_a32[256]; + +extern const OpFn ops_sf_fpu_d8_a16[32]; +extern const OpFn ops_sf_fpu_d8_a32[32]; +extern const OpFn ops_sf_fpu_d9_a16[256]; +extern const OpFn ops_sf_fpu_d9_a32[256]; +extern const OpFn ops_sf_fpu_da_a16[256]; +extern const OpFn ops_sf_fpu_da_a32[256]; +extern const OpFn ops_sf_fpu_db_a16[256]; +extern const OpFn ops_sf_fpu_db_a32[256]; +extern const OpFn ops_sf_fpu_dc_a16[32]; +extern const OpFn ops_sf_fpu_dc_a32[32]; +extern const OpFn ops_sf_fpu_dd_a16[256]; +extern const OpFn ops_sf_fpu_dd_a32[256]; +extern const OpFn ops_sf_fpu_de_a16[256]; +extern const OpFn ops_sf_fpu_de_a32[256]; +extern const OpFn ops_sf_fpu_df_a16[256]; +extern const OpFn ops_sf_fpu_df_a32[256]; + extern const OpFn ops_fpu_287_d9_a16[256]; extern const OpFn ops_fpu_287_d9_a32[256]; extern const OpFn ops_fpu_287_da_a16[256]; @@ -239,6 +272,13 @@ extern const OpFn ops_fpu_df_a32[256]; extern const OpFn ops_nofpu_a16[256]; extern const OpFn ops_nofpu_a32[256]; +extern const OpFn ops_sf_fpu_686_da_a16[256]; +extern const OpFn ops_sf_fpu_686_da_a32[256]; +extern const OpFn ops_sf_fpu_686_db_a16[256]; +extern const OpFn ops_sf_fpu_686_db_a32[256]; +extern const OpFn ops_sf_fpu_686_df_a16[256]; +extern const OpFn ops_sf_fpu_686_df_a32[256]; + extern const OpFn ops_fpu_686_da_a16[256]; extern const OpFn ops_fpu_686_da_a32[256]; extern const OpFn ops_fpu_686_db_a16[256]; diff --git a/src/cpu/x86_ops_fpu.h b/src/cpu/x86_ops_fpu.h index 314ec321b..502218be5 100644 --- a/src/cpu/x86_ops_fpu.h +++ b/src/cpu/x86_ops_fpu.h @@ -96,6 +96,15 @@ opWAIT(uint32_t fetchdat) x86_int(7); return 1; } + + if (!cpu_use_dynarec && fpu_softfloat) { + if (fpu_state.swd & FPU_SW_Summary) { + if (cr0 & 0x20) { + x86_int(16); + return 1; + } + } + } CLOCK_CYCLES(4); return 0; } diff --git a/src/cpu/x87.c b/src/cpu/x87.c index 0b93af9da..98ceb105b 100644 --- a/src/cpu/x87.c +++ b/src/cpu/x87.c @@ -15,6 +15,7 @@ #include "x86_ops.h" #include "x87.h" #include "386_common.h" +#include "softfloat/softfloat-specialize.h" uint32_t x87_pc_off, x87_op_off; uint16_t x87_pc_seg, x87_op_seg; @@ -37,11 +38,6 @@ fpu_log(const char *fmt, ...) # define fpu_log(fmt, ...) #endif -#define X87_TAG_VALID 0 -#define X87_TAG_ZERO 1 -#define X87_TAG_INVALID 2 -#define X87_TAG_EMPTY 3 - #ifdef USE_NEW_DYNAREC uint16_t x87_gettag(void) @@ -110,6 +106,340 @@ x87_settag(uint16_t new_tag) } #endif + +static floatx80 +FPU_handle_NaN32_Func(floatx80 a, int aIsNaN, float32 b32, int bIsNaN, struct float_status_t *status) +{ + int aIsSignalingNaN = floatx80_is_signaling_nan(a); + int bIsSignalingNaN = float32_is_signaling_nan(b32); + + if (aIsSignalingNaN | bIsSignalingNaN) + float_raise(status, float_flag_invalid); + + // propagate QNaN to SNaN + a = propagateFloatx80NaNOne(a, status); + + if (aIsNaN & !bIsNaN) return a; + + // float32 is NaN so conversion will propagate SNaN to QNaN and raise + // appropriate exception flags + floatx80 b = float32_to_floatx80(b32, status); + + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN) return a; + returnLargerSignificand: + if (a.fraction < b.fraction) return b; + if (b.fraction < a.fraction) return a; + return (a.exp < b.exp) ? a : b; + } + else { + return b; + } +} + +int +FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + if (floatx80_is_unsupported(a)) { + float_raise(status, float_flag_invalid); + *r = floatx80_default_nan; + return 1; + } + + int aIsNaN = floatx80_is_nan(a), bIsNaN = float32_is_nan(b); + if (aIsNaN | bIsNaN) { + *r = FPU_handle_NaN32_Func(a, aIsNaN, b, bIsNaN, status); + return 1; + } + return 0; +} + +static floatx80 +FPU_handle_NaN64_Func(floatx80 a, int aIsNaN, float64 b64, int bIsNaN, struct float_status_t *status) +{ + int aIsSignalingNaN = floatx80_is_signaling_nan(a); + int bIsSignalingNaN = float64_is_signaling_nan(b64); + + if (aIsSignalingNaN | bIsSignalingNaN) + float_raise(status, float_flag_invalid); + + // propagate QNaN to SNaN + a = propagateFloatx80NaNOne(a, status); + + if (aIsNaN & !bIsNaN) return a; + + // float64 is NaN so conversion will propagate SNaN to QNaN and raise + // appropriate exception flags + floatx80 b = float64_to_floatx80(b64, status); + + if (aIsSignalingNaN) { + if (bIsSignalingNaN) goto returnLargerSignificand; + return bIsNaN ? b : a; + } + else if (aIsNaN) { + if (bIsSignalingNaN) return a; + returnLargerSignificand: + if (a.fraction < b.fraction) return b; + if (b.fraction < a.fraction) return a; + return (a.exp < b.exp) ? a : b; + } + else { + return b; + } +} + +int +FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + if (floatx80_is_unsupported(a)) { + float_raise(status, float_flag_invalid); + *r = floatx80_default_nan; + return 1; + } + + int aIsNaN = floatx80_is_nan(a), bIsNaN = float64_is_nan(b); + if (aIsNaN | bIsNaN) { + *r = FPU_handle_NaN64_Func(a, aIsNaN, b, bIsNaN, status); + return 1; + } + return 0; +} + +struct float_status_t +i387cw_to_softfloat_status_word(uint16_t control_word) +{ + struct float_status_t status; + int precision = control_word & FPU_CW_PC; + + switch (precision) { + case FPU_PR_32_BITS: + status.float_rounding_precision = 32; + break; + case FPU_PR_64_BITS: + status.float_rounding_precision = 64; + break; + case FPU_PR_80_BITS: + status.float_rounding_precision = 80; + break; + default: + /* With the precision control bits set to 01 "(reserved)", a + real CPU behaves as if the precision control bits were + set to 11 "80 bits" */ + status.float_rounding_precision = 80; + break; + } + + status.float_exception_flags = 0; // clear exceptions before execution + status.float_nan_handling_mode = float_first_operand_nan; + status.float_rounding_mode = (control_word & FPU_CW_RC) >> 10; + status.flush_underflow_to_zero = 0; + status.float_suppress_exception = 0; + status.float_exception_masks = control_word & FPU_CW_Exceptions_Mask; + status.denormals_are_zeros = 0; + return status; +} + + +int +FPU_status_word_flags_fpu_compare(int float_relation) +{ + switch (float_relation) { + case float_relation_unordered: + return (C0 | C2 | C3); + + case float_relation_greater: + return (0); + + case float_relation_less: + return (C0); + + case float_relation_equal: + return (C3); + } + + return (-1); // should never get here +} + +void +FPU_write_eflags_fpu_compare(int float_relation) +{ + switch (float_relation) { + case float_relation_unordered: + cpu_state.flags |= (Z_FLAG | P_FLAG | C_FLAG); + break; + + case float_relation_greater: + break; + + case float_relation_less: + cpu_state.flags |= (C_FLAG); + break; + + case float_relation_equal: + cpu_state.flags |= (Z_FLAG); + break; + + default: + break; + } +} + +uint16_t +FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store) +{ + uint16_t status; + uint16_t unmasked; + + /* Extract only the bits which we use to set the status word */ + exceptions &= FPU_SW_Exceptions_Mask; + status = fpu_state.swd; + + unmasked = (exceptions & ~fpu_state.cwd) & FPU_CW_Exceptions_Mask; + + // if IE or DZ exception happen nothing else will be reported + if (exceptions & (FPU_EX_Invalid | FPU_EX_Zero_Div)) { + unmasked &= (FPU_EX_Invalid | FPU_EX_Zero_Div); + } + + /* Set summary bits if exception isn't masked */ + if (unmasked) { + fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward); + } + + if (exceptions & FPU_EX_Invalid) { + // FPU_EX_Invalid cannot come with any other exception but x87 stack fault + fpu_state.swd |= exceptions; + if (exceptions & FPU_SW_Stack_Fault) { + if (!(exceptions & C1)) { + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + fpu_state.swd &= ~C1; + } + } + return unmasked; + } + + if (exceptions & FPU_EX_Zero_Div) { + fpu_state.swd |= FPU_EX_Zero_Div; + if (!(fpu_state.cwd & FPU_EX_Zero_Div)) { +#ifdef FPU_8087 + if (!(fpu_state.cwd & FPU_SW_Summary)) { + fpu_state.cwd |= FPU_SW_Summary; + nmi = 1; + } +#else + picint(1 << 13); +#endif // FPU_8087 + } + return unmasked; + } + + if (exceptions & FPU_EX_Denormal) { + fpu_state.swd |= FPU_EX_Denormal; + if (unmasked & FPU_EX_Denormal) { + return (unmasked & FPU_EX_Denormal); + } + } + + /* Set the corresponding exception bits */ + fpu_state.swd |= exceptions; + + if (exceptions & FPU_EX_Precision) { + if (!(exceptions & C1)) { + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + fpu_state.swd &= ~C1; + } + } + + // If #P unmasked exception occurred the result still has to be + // written to the destination. + unmasked &= ~FPU_EX_Precision; + + if (unmasked & (FPU_EX_Underflow | FPU_EX_Overflow)) { + // If unmasked over- or underflow occurs and dest is a memory location: + // - the TOS and destination operands remain unchanged + // - the inexact-result condition is not reported and C1 flag is cleared + // - no result is stored in the memory + // If the destination is in the register stack, adjusted resulting value + // is stored in the destination operand. + if (!store) + unmasked &= ~(FPU_EX_Underflow | FPU_EX_Overflow); + else { + fpu_state.swd &= ~C1; + if (!(status & FPU_EX_Precision)) + fpu_state.swd &= ~FPU_EX_Precision; + } + } + return unmasked; +} + +void +FPU_stack_overflow(uint32_t fetchdat) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + /* The masked response */ + if (is_IA_masked()) { + FPU_push(); + FPU_save_regi(floatx80_default_nan, 0); + } + FPU_exception(fetchdat, FPU_EX_Stack_Overflow, 0); +} + +void +FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack) +{ + const floatx80 floatx80_default_nan = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); + + /* The masked response */ + if (is_IA_masked()) { + FPU_save_regi(floatx80_default_nan, stnr); + if (pop_stack) + FPU_pop(); + } + FPU_exception(fetchdat, FPU_EX_Stack_Underflow, 0); +} + +/* ----------------------------------------------------------- + * Slimmed down version used to compile against a CPU simulator + * rather than a kernel (ported by Kevin Lawton) + * ------------------------------------------------------------ */ +int +FPU_tagof(const floatx80 reg) +{ + int32_t exp = floatx80_exp(reg); + if (exp == 0) { + if (!floatx80_fraction(reg)) + return X87_TAG_ZERO; + + /* The number is a de-normal or pseudodenormal. */ + return X87_TAG_INVALID; + } + + if (exp == 0x7fff) { + /* Is an Infinity, a NaN, or an unsupported data type. */ + return X87_TAG_INVALID; + } + + if (!(reg.fraction & BX_CONST64(0x8000000000000000))) { + /* Unsupported data type. */ + /* Valid numbers have the ms bit set to 1. */ + return X87_TAG_INVALID; + } + + return X87_TAG_VALID; +} + + #ifdef ENABLE_808X_LOG void x87_dumpregs(void) diff --git a/src/cpu/x87.h b/src/cpu/x87.h index 96ad835c8..5d460bc4b 100644 --- a/src/cpu/x87.h +++ b/src/cpu/x87.h @@ -1,7 +1,7 @@ -#define C0 (1 << 8) -#define C1 (1 << 9) -#define C2 (1 << 10) -#define C3 (1 << 14) +#define X87_TAG_VALID 0 +#define X87_TAG_ZERO 1 +#define X87_TAG_INVALID 2 +#define X87_TAG_EMPTY 3 extern uint32_t x87_pc_off, x87_op_off; extern uint16_t x87_pc_seg, x87_op_seg; @@ -46,3 +46,184 @@ void x87_settag(uint16_t new_tag); #define X87_ROUNDING_CHOP 3 void codegen_set_rounding_mode(int mode); + +/* Status Word */ +#define FPU_SW_Backward (0x8000) /* backward compatibility */ +#define FPU_SW_C3 (0x4000) /* condition bit 3 */ +#define FPU_SW_Top (0x3800) /* top of stack */ +#define FPU_SW_C2 (0x0400) /* condition bit 2 */ +#define FPU_SW_C1 (0x0200) /* condition bit 1 */ +#define FPU_SW_C0 (0x0100) /* condition bit 0 */ +#define FPU_SW_Summary (0x0080) /* exception summary */ +#define FPU_SW_Stack_Fault (0x0040) /* stack fault */ +#define FPU_SW_Precision (0x0020) /* loss of precision */ +#define FPU_SW_Underflow (0x0010) /* underflow */ +#define FPU_SW_Overflow (0x0008) /* overflow */ +#define FPU_SW_Zero_Div (0x0004) /* divide by zero */ +#define FPU_SW_Denormal_Op (0x0002) /* denormalized operand */ +#define FPU_SW_Invalid (0x0001) /* invalid operation */ + +#define C0 (1 << 8) +#define C1 (1 << 9) +#define C2 (1 << 10) +#define C3 (1 << 14) + +#define FPU_SW_CC (C0 | C1 | C2 | C3) + +#define FPU_SW_Exceptions_Mask (0x027f) /* status word exceptions bit mask */ + +/* Exception flags: */ +#define FPU_EX_Precision (0x0020) /* loss of precision */ +#define FPU_EX_Underflow (0x0010) /* underflow */ +#define FPU_EX_Overflow (0x0008) /* overflow */ +#define FPU_EX_Zero_Div (0x0004) /* divide by zero */ +#define FPU_EX_Denormal (0x0002) /* denormalized operand */ +#define FPU_EX_Invalid (0x0001) /* invalid operation */ + +/* Special exceptions: */ +#define FPU_EX_Stack_Overflow (0x0041| C1) /* stack overflow */ +#define FPU_EX_Stack_Underflow (0x0041) /* stack underflow */ + +/* precision control */ +#define FPU_EX_Precision_Lost_Up (EX_Precision | C1) +#define FPU_EX_Precision_Lost_Dn (EX_Precision) + +#define setcc(cc) \ + fpu_state.swd = (fpu_state.swd & ~(FPU_SW_CC)) | ((cc) & FPU_SW_CC) + +#define clear_C1() { fpu_state.swd &= ~C1; } +#define clear_C2() { fpu_state.swd &= ~C2; } + +/* ************ */ +/* Control Word */ +/* ************ */ + +#define FPU_CW_Inf (0x1000) /* infinity control, legacy */ + +#define FPU_CW_RC (0x0C00) /* rounding control */ +#define FPU_CW_PC (0x0300) /* precision control */ + +#define FPU_RC_RND (0x0000) /* rounding control */ +#define FPU_RC_DOWN (0x0400) +#define FPU_RC_UP (0x0800) +#define FPU_RC_CHOP (0x0C00) + +#define FPU_CW_Precision (0x0020) /* loss of precision mask */ +#define FPU_CW_Underflow (0x0010) /* underflow mask */ +#define FPU_CW_Overflow (0x0008) /* overflow mask */ +#define FPU_CW_Zero_Div (0x0004) /* divide by zero mask */ +#define FPU_CW_Denormal (0x0002) /* denormalized operand mask */ +#define FPU_CW_Invalid (0x0001) /* invalid operation mask */ + +#define FPU_CW_Exceptions_Mask (0x003f) /* all masks */ + +/* Precision control bits affect only the following: + ADD, SUB(R), MUL, DIV(R), and SQRT */ +#define FPU_PR_32_BITS (0x000) +#define FPU_PR_RESERVED_BITS (0x100) +#define FPU_PR_64_BITS (0x200) +#define FPU_PR_80_BITS (0x300) + +#include "softfloat/softfloatx80.h" + +static __inline const int +is_IA_masked(void) +{ + return (fpu_state.cwd & FPU_CW_Invalid); +} + +struct float_status_t i387cw_to_softfloat_status_word(uint16_t control_word); +uint16_t FPU_exception(uint32_t fetchdat, uint16_t exceptions, int store); +int FPU_status_word_flags_fpu_compare(int float_relation); +void FPU_write_eflags_fpu_compare(int float_relation); +void FPU_stack_overflow(uint32_t fetchdat); +void FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack); +int FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status); +int FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status); +int FPU_tagof(const floatx80 reg); + +static __inline uint16_t +i387_get_control_word(void) +{ + return (fpu_state.cwd); +} + +static __inline uint16_t +i387_get_status_word(void) +{ + return (fpu_state.swd & ~FPU_SW_Top & 0xFFFF) | ((fpu_state.tos << 11) & FPU_SW_Top); +} + +#define IS_TAG_EMPTY(i) \ + (FPU_gettagi(i) == X87_TAG_EMPTY) + +static __inline int +FPU_gettagi(int stnr) +{ + return (fpu_state.tag >> (((stnr + fpu_state.tos) & 7) * 2)) & 3; +} + +static __inline void +FPU_settagi_valid(int stnr) +{ + int regnr = (stnr + fpu_state.tos) & 7; + fpu_state.tag &= ~(3 << (regnr * 2)); // FPU_Tag_Valid == '00 +} + +static __inline void +FPU_settagi(int tag, int stnr) +{ + int regnr = (stnr + fpu_state.tos) & 7; + fpu_state.tag &= ~(3 << (regnr * 2)); + fpu_state.tag |= (tag & 3) << (regnr * 2); +} + +static __inline void +FPU_push(void) +{ + fpu_state.tos = (fpu_state.tos - 1) & 7; +} + +static __inline void +FPU_pop(void) +{ + fpu_state.tag |= 3 << (fpu_state.tos * 2); + fpu_state.tos = (fpu_state.tos + 1) & 7; +} + +static __inline floatx80 +FPU_read_regi(int stnr) +{ + return fpu_state.st_space[(stnr + fpu_state.tos) & 7]; +} + +// it is only possible to read FPU tag word through certain +// instructions like FNSAVE, and they update tag word to its +// real value anyway +static __inline void +FPU_save_regi(floatx80 reg, int stnr) +{ + fpu_state.st_space[(stnr + fpu_state.tos) & 7] = reg; + FPU_settagi_valid(stnr); +} + +static __inline void +FPU_save_regi_tag(floatx80 reg, int tag, int stnr) +{ + fpu_state.st_space[(stnr + fpu_state.tos) & 7] = reg; + FPU_settagi(tag, stnr); +} + + +#define FPU_check_pending_exceptions() \ +do { \ + if (fpu_state.swd & FPU_SW_Summary) { \ + if (cr0 & 0x20) { \ + x86_int(16); \ + return 1; \ + } else { \ + picint(1 << 13); \ + return 1; \ + } \ + } \ +} while (0) diff --git a/src/cpu/x87_ops.h b/src/cpu/x87_ops.h index b2b082107..0ab6b2e68 100644 --- a/src/cpu/x87_ops.h +++ b/src/cpu/x87_ops.h @@ -26,6 +26,7 @@ #ifdef _MSC_VER # include #endif +#include "x87_ops_conv.h" #ifdef ENABLE_FPU_LOG extern void fpu_log(const char *fmt, ...); @@ -44,6 +45,11 @@ static int rounding_modes[4] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARD #define C2 (1 << 10) #define C3 (1 << 14) +#define X87_TAG_VALID 0 +#define X87_TAG_ZERO 1 +#define X87_TAG_INVALID 2 +#define X87_TAG_EMPTY 3 + #define STATUS_ZERODIVIDE 4 typedef union @@ -250,8 +256,6 @@ x87_fround(double b) return 0LL; } -#include "x87_ops_conv.h" - static __inline double x87_ld80(void) { @@ -477,6 +481,13 @@ typedef union { # define FP_TAG_VALID_N cpu_state.tag[(cpu_state.TOP + 1) & 7] &= ~TAG_UINT64 #endif +#include "softfloat/x87_ops_arith.h" +#include "softfloat/x87_ops_compare.h" +#include "softfloat/x87_ops_const.h" +#include "softfloat/x87_ops_load_store.h" +#include "softfloat/x87_ops_misc.h" +#include "softfloat/x87_ops_trans.h" +#include "softfloat/x87_ops_other.h" #include "x87_ops_arith.h" #include "x87_ops_misc.h" #include "x87_ops_loadstore.h" @@ -537,6 +548,264 @@ FPU_ILLEGAL_a32(uint32_t fetchdat) #define ILLEGAL_a16 FPU_ILLEGAL_a16 #ifdef FPU_8087 +const OpFn OP_TABLE(sf_fpu_8087_d8)[32] = { + // clang-format off + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADD_st0_stj, sf_FMUL_st0_stj, sf_FCOM_sti, sf_FCOMP_sti, sf_FSUB_st0_stj, sf_FSUBR_st0_stj, sf_FDIV_st0_stj, sf_FDIVR_st0_stj, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_d9)[256] = { + // clang-format off + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, /*Invalid*/ + sf_FCHS, sf_FABS, ILLEGAL_a16, ILLEGAL_a16, sf_FTST, sf_FXAM, ILLEGAL_a16, ILLEGAL_a16, + sf_FLD1, sf_FLDL2T, sf_FLDL2E, sf_FLDPI, sf_FLDEG2, sf_FLDLN2, sf_FLDZ, ILLEGAL_a16, + sf_F2XM1, sf_FYL2X, sf_FPTAN, sf_FPATAN, sf_FXTRACT, sf_FPREM1, sf_FDECSTP, sf_FINCSTP, + sf_FPREM, sf_FYL2XP1, sf_FSQRT, ILLEGAL_a16, sf_FRNDINT, sf_FSCALE, ILLEGAL_a16, ILLEGAL_a16 + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_da)[256] = { + // clang-format off + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_db)[256] = { + // clang-format off + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FI, sf_FI, sf_FNCLEX, sf_FNINIT, ILLEGAL_a16, sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_dc)[32] = { + // clang-format off + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADD_sti_st0, sf_FMUL_sti_st0, ILLEGAL_a16, ILLEGAL_a16, sf_FSUBR_sti_st0, sf_FSUB_sti_st0, sf_FDIVR_sti_st0, sf_FDIV_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_dd)[256] = { + // clang-format off + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_de)[256] = { + // clang-format off + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, + sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, sf_FCOMPP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, + sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, + sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, + sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_8087_df)[256] = { + // clang-format off + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + const OpFn OP_TABLE(fpu_8087_d8)[32] = { // clang-format off opFADDs_a16, opFMULs_a16, opFCOMs_a16, opFCOMPs_a16, opFSUBs_a16, opFSUBRs_a16, opFDIVs_a16, opFDIVRs_a16, @@ -581,7 +850,7 @@ const OpFn OP_TABLE(fpu_8087_d9)[256] = { ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, /*Invalid*/ opFCHS, opFABS, ILLEGAL_a16, ILLEGAL_a16, opFTST, opFXAM, ILLEGAL_a16, ILLEGAL_a16, opFLD1, opFLDL2T, opFLDL2E, opFLDPI, opFLDEG2, opFLDLN2, opFLDZ, ILLEGAL_a16, - opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, + opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, opFPREM, opFYL2XP1, opFSQRT, ILLEGAL_a16, opFRNDINT, opFSCALE, ILLEGAL_a16, ILLEGAL_a16 // clang-format on }; @@ -797,6 +1066,1260 @@ const OpFn OP_TABLE(fpu_8087_df)[256] = { #else # define ILLEGAL_a32 FPU_ILLEGAL_a32 + +const OpFn OP_TABLE(sf_fpu_d8_a16)[32] = { + // clang-format off + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADDs_a16, sf_FMULs_a16, sf_FCOMs_a16, sf_FCOMPs_a16, sf_FSUBs_a16, sf_FSUBRs_a16, sf_FDIVs_a16, sf_FDIVRs_a16, + sf_FADD_st0_stj, sf_FMUL_st0_stj, sf_FCOM_sti, sf_FCOMP_sti, sf_FSUB_st0_stj, sf_FSUBR_st0_stj, sf_FDIV_st0_stj, sf_FDIVR_st0_stj, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_d8_a32)[32] = { + // clang-format off + sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32, + sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32, + sf_FADDs_a32, sf_FMULs_a32, sf_FCOMs_a32, sf_FCOMPs_a32, sf_FSUBs_a32, sf_FSUBRs_a32, sf_FDIVs_a32, sf_FDIVRs_a32, + sf_FADD_st0_stj, sf_FMUL_st0_stj, sf_FCOM_sti, sf_FCOMP_sti, sf_FSUB_st0_stj, sf_FSUBR_st0_stj, sf_FDIV_st0_stj, sf_FDIVR_st0_stj, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_d9_a16)[256] = { + // clang-format off + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, /*Invalid*/ + sf_FCHS, sf_FABS, ILLEGAL_a16, ILLEGAL_a16, sf_FTST, sf_FXAM, ILLEGAL_a16, ILLEGAL_a16, + sf_FLD1, sf_FLDL2T, sf_FLDL2E, sf_FLDPI, sf_FLDEG2, sf_FLDLN2, sf_FLDZ, ILLEGAL_a16, + sf_F2XM1, sf_FYL2X, sf_FPTAN, sf_FPATAN, sf_FXTRACT, sf_FPREM1, sf_FDECSTP, sf_FINCSTP, + sf_FPREM, sf_FYL2XP1, sf_FSQRT, sf_FSINCOS, sf_FRNDINT, sf_FSCALE, sf_FSIN, sf_FCOS, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_d9_a32)[256] = { + // clang-format off + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FNOP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, /*Invalid*/ + sf_FCHS, sf_FABS, ILLEGAL_a32, ILLEGAL_a32, sf_FTST, sf_FXAM, ILLEGAL_a32, ILLEGAL_a32, + sf_FLD1, sf_FLDL2T, sf_FLDL2E, sf_FLDPI, sf_FLDEG2, sf_FLDLN2, sf_FLDZ, ILLEGAL_a32, + sf_F2XM1, sf_FYL2X, sf_FPTAN, sf_FPATAN, sf_FXTRACT, sf_FPREM1, sf_FDECSTP, sf_FINCSTP, + sf_FPREM, sf_FYL2XP1, sf_FSQRT, sf_FSINCOS, sf_FRNDINT, sf_FSCALE, sf_FSIN, sf_FCOS, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_d9_a16)[256] = { + // clang-format off + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, sf_FLDs_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, sf_FSTs_a16, + sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, sf_FSTPs_a16, + sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, sf_FLDENV_a16, + sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, sf_FLDCW_a16, + sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, sf_FNSTENV_a16, + sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, sf_FNSTCW_a16, + + sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, /*Invalid*/ + sf_FCHS, sf_FABS, ILLEGAL_a16, ILLEGAL_a16, sf_FTST, sf_FXAM, ILLEGAL_a16, ILLEGAL_a16, + sf_FLD1, sf_FLDL2T, sf_FLDL2E, sf_FLDPI, sf_FLDEG2, sf_FLDLN2, sf_FLDZ, ILLEGAL_a16, + sf_F2XM1, sf_FYL2X, sf_FPTAN, sf_FPATAN, sf_FXTRACT, sf_FPREM1, sf_FDECSTP, sf_FINCSTP, + sf_FPREM, sf_FYL2XP1, sf_FSQRT, sf_FSINCOS, sf_FRNDINT, sf_FSCALE, sf_FSIN, sf_FCOS, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_d9_a32)[256] = { + // clang-format off + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, sf_FLDs_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, sf_FSTs_a32, + sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, sf_FSTPs_a32, + sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, sf_FLDENV_a32, + sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, sf_FLDCW_a32, + sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, sf_FNSTENV_a32, + sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, sf_FNSTCW_a32, + + sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, sf_FLD_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FNOP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, /*Invalid*/ + sf_FCHS, sf_FABS, ILLEGAL_a32, ILLEGAL_a32, sf_FTST, sf_FXAM, ILLEGAL_a32, ILLEGAL_a32, + sf_FLD1, sf_FLDL2T, sf_FLDL2E, sf_FLDPI, sf_FLDEG2, sf_FLDLN2, sf_FLDZ, ILLEGAL_a32, + sf_F2XM1, sf_FYL2X, sf_FPTAN, sf_FPATAN, sf_FXTRACT, sf_FPREM1, sf_FDECSTP, sf_FINCSTP, + sf_FPREM, sf_FYL2XP1, sf_FSQRT, sf_FSINCOS, sf_FRNDINT, sf_FSCALE, sf_FSIN, sf_FCOS, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_da_a16)[256] = { + // clang-format off + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_da_a32)[256] = { + // clang-format off + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_da_a16)[256] = { + // clang-format off + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, sf_FUCOMPP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_da_a32)[256] = { + // clang-format off + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, sf_FUCOMPP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_686_da_a16)[256] = { + // clang-format off + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, sf_FADDil_a16, + sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, sf_FMULil_a16, + sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, sf_FCOMil_a16, + sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, sf_FCOMPil_a16, + sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, sf_FSUBil_a16, + sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, sf_FSUBRil_a16, + sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, sf_FDIVil_a16, + sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, sf_FDIVRil_a16, + + sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, + sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, + sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, + sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, sf_FUCOMPP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_686_da_a32)[256] = { + // clang-format off + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, sf_FADDil_a32, + sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, sf_FMULil_a32, + sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, sf_FCOMil_a32, + sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, sf_FCOMPil_a32, + sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, sf_FSUBil_a32, + sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, sf_FSUBRil_a32, + sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, sf_FDIVil_a32, + sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, sf_FDIVRil_a32, + + sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, sf_FCMOVB, + sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, sf_FCMOVE, + sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, sf_FCMOVBE, + sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, sf_FCMOVU, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, sf_FUCOMPP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_db_a16)[256] = { + // clang-format off + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_db_a32)[256] = { + // clang-format off + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_db_a16)[256] = { + // clang-format off + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_db_a32)[256] = { + // clang-format off + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_686_db_a16)[256] = { + // clang-format off + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, sf_FILDil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, sf_FISTil_a16, + sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, sf_FISTPil_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, sf_FLDe_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, sf_FSTPe_a16, + + sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, + sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, + sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, + sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a16, ILLEGAL_a16, + sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, + sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; +const OpFn OP_TABLE(sf_fpu_686_db_a32)[256] = { + // clang-format off + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, sf_FILDil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, sf_FISTil_a32, + sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, sf_FISTPil_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, sf_FLDe_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, sf_FSTPe_a32, + + sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, sf_FCMOVNB, + sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, sf_FCMOVNE, + sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, sf_FCMOVNBE, + sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, sf_FCMOVNU, + sf_FNOP, sf_FNOP, sf_FNCLEX, sf_FNINIT, sf_FNOP, sf_FNOP, ILLEGAL_a32, ILLEGAL_a32, + sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, sf_FUCOMI_st0_stj, + sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, sf_FCOMI_st0_stj, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_dc_a16)[32] = { + // clang-format off + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADD_sti_st0, sf_FMUL_sti_st0, ILLEGAL_a16, ILLEGAL_a16, sf_FSUBR_sti_st0, sf_FSUB_sti_st0, sf_FDIVR_sti_st0, sf_FDIV_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_dc_a32)[32] = { + // clang-format off + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADD_sti_st0, sf_FMUL_sti_st0, ILLEGAL_a32, ILLEGAL_a32, sf_FSUBR_sti_st0, sf_FSUB_sti_st0, sf_FDIVR_sti_st0, sf_FDIV_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_dc_a16)[32] = { + // clang-format off + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADDd_a16, sf_FMULd_a16, sf_FCOMd_a16, sf_FCOMPd_a16, sf_FSUBd_a16, sf_FSUBRd_a16, sf_FDIVd_a16, sf_FDIVRd_a16, + sf_FADD_sti_st0, sf_FMUL_sti_st0, sf_FCOM_sti, sf_FCOMP_sti, sf_FSUBR_sti_st0, sf_FSUB_sti_st0, sf_FDIVR_sti_st0, sf_FDIV_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_dc_a32)[32] = { + // clang-format off + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADDd_a32, sf_FMULd_a32, sf_FCOMd_a32, sf_FCOMPd_a32, sf_FSUBd_a32, sf_FSUBRd_a32, sf_FDIVd_a32, sf_FDIVRd_a32, + sf_FADD_sti_st0, sf_FMUL_sti_st0, sf_FCOM_sti, sf_FCOMP_sti, sf_FSUBR_sti_st0, sf_FSUB_sti_st0, sf_FDIVR_sti_st0, sf_FDIV_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_dd_a16)[256] = { + // clang-format off + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_dd_a32)[256] = { + // clang-format off + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_dd_a16)[256] = { + // clang-format off + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, sf_FLDd_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, sf_FSTd_a16, + sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, sf_FSTPd_a16, + sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, sf_FRSTOR_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, sf_FNSAVE_a16, + sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, sf_FNSTSW_a16, + + sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, + sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_dd_a32)[256] = { + // clang-format off + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, sf_FLDd_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, sf_FSTd_a32, + sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, sf_FSTPd_a32, + sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, sf_FRSTOR_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, sf_FNSAVE_a32, + sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, sf_FNSTSW_a32, + + sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, sf_FFREE_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, sf_FST_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, sf_FUCOM_sti, + sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, sf_FUCOMP_sti, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_de_a16)[256] = { + // clang-format off + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, + sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, sf_FCOMPP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, + sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, + sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, + sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_de_a32)[256] = { + // clang-format off + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, + sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, sf_FCOMPP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, + sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, + sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, + sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_de_a16)[256] = { + // clang-format off + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, sf_FADDiw_a16, + sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, sf_FMULiw_a16, + sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, sf_FCOMiw_a16, + sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, sf_FCOMPiw_a16, + sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, sf_FSUBiw_a16, + sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, sf_FSUBRiw_a16, + sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, sf_FDIViw_a16, + sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, sf_FDIVRiw_a16, + + sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, + sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, + sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, + ILLEGAL_a16, sf_FCOMPP, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, + sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, + sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, + sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_de_a32)[256] = { + // clang-format off + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, sf_FADDiw_a32, + sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, sf_FMULiw_a32, + sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, sf_FCOMiw_a32, + sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, sf_FCOMPiw_a32, + sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, sf_FSUBiw_a32, + sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, sf_FSUBRiw_a32, + sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, sf_FDIViw_a32, + sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, sf_FDIVRiw_a32, + + sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, sf_FADDP_sti_st0, + sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, sf_FMULP_sti_st0, + sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, sf_FCOMP_sti, + ILLEGAL_a32, sf_FCOMPP, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, sf_FSUBRP_sti_st0, + sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, sf_FSUBP_sti_st0, + sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, sf_FDIVRP_sti_st0, + sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, sf_FDIVP_sti_st0, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_df_a16)[256] = { + // clang-format off + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FNSTSW_AX, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_287_df_a32)[256] = { + // clang-format off + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FNSTSW_AX, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_df_a16)[256] = { + // clang-format off + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FNSTSW_AX, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_df_a32)[256] = { + // clang-format off + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FNSTSW_AX, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_686_df_a16)[256] = { + // clang-format off + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, sf_FILDiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, sf_FISTiw_a16, + sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, sf_FISTPiw_a16, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, sf_FILDiq_a16, + sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, sf_FBSTP_PACKED_BCD_a16, + sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, sf_FISTPiq_a16, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FNSTSW_AX, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, + sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, + ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, + // clang-format on +}; + +const OpFn OP_TABLE(sf_fpu_686_df_a32)[256] = { + // clang-format off + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, sf_FILDiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, sf_FISTiw_a32, + sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, sf_FISTPiw_a32, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, sf_FILDiq_a32, + sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, sf_FBSTP_PACKED_BCD_a32, + sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, sf_FISTPiq_a32, + + sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, sf_FFREEP_sti, + sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, sf_FXCH_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, sf_FSTP_sti, + sf_FNSTSW_AX, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, sf_FUCOMIP_st0_stj, + sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, sf_FCOMIP_st0_stj, + ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, + // clang-format on +}; + const OpFn OP_TABLE(fpu_d8_a16)[32] = { // clang-format off opFADDs_a16, opFMULs_a16, opFCOMs_a16, opFCOMPs_a16, opFSUBs_a16, opFSUBRs_a16, opFDIVs_a16, opFDIVRs_a16, @@ -850,7 +2373,7 @@ const OpFn OP_TABLE(fpu_287_d9_a16)[256] = { ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, ILLEGAL_a16, /*Invalid*/ opFCHS, opFABS, ILLEGAL_a16, ILLEGAL_a16, opFTST, opFXAM, ILLEGAL_a16, ILLEGAL_a16, opFLD1, opFLDL2T, opFLDL2E, opFLDPI, opFLDEG2, opFLDLN2, opFLDZ, ILLEGAL_a16, - opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, + opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, opFPREM, opFYL2XP1, opFSQRT, opFSINCOS, opFRNDINT, opFSCALE, opFSIN, opFCOS // clang-format on }; @@ -890,7 +2413,7 @@ const OpFn OP_TABLE(fpu_287_d9_a32)[256] = { ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, ILLEGAL_a32, /*Invalid*/ opFCHS, opFABS, ILLEGAL_a32, ILLEGAL_a32, opFTST, opFXAM, ILLEGAL_a32, ILLEGAL_a32, opFLD1, opFLDL2T, opFLDL2E, opFLDPI, opFLDEG2, opFLDLN2, opFLDZ, ILLEGAL_a32, - opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, + opF2XM1, opFYL2X, opFPTAN, opFPATAN, ILLEGAL_a32, opFPREM1, opFDECSTP, opFINCSTP, opFPREM, opFYL2XP1, opFSQRT, opFSINCOS, opFRNDINT, opFSCALE, opFSIN, opFCOS // clang-format on }; @@ -930,7 +2453,7 @@ const OpFn OP_TABLE(fpu_d9_a16)[256] = { opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, /*Invalid*/ opFCHS, opFABS, ILLEGAL_a16, ILLEGAL_a16, opFTST, opFXAM, ILLEGAL_a16, ILLEGAL_a16, opFLD1, opFLDL2T, opFLDL2E, opFLDPI, opFLDEG2, opFLDLN2, opFLDZ, ILLEGAL_a16, - opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, + opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, opFPREM, opFYL2XP1, opFSQRT, opFSINCOS, opFRNDINT, opFSCALE, opFSIN, opFCOS // clang-format on }; @@ -970,7 +2493,7 @@ const OpFn OP_TABLE(fpu_d9_a32)[256] = { opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, opFSTP, /*Invalid*/ opFCHS, opFABS, ILLEGAL_a32, ILLEGAL_a32, opFTST, opFXAM, ILLEGAL_a32, ILLEGAL_a32, opFLD1, opFLDL2T, opFLDL2E, opFLDPI, opFLDEG2, opFLDLN2, opFLDZ, ILLEGAL_a32, - opF2XM1, opFYL2X, opFPTAN, opFPATAN, opFXTRACT, opFPREM1, opFDECSTP, opFINCSTP, + opF2XM1, opFYL2X, opFPTAN, opFPATAN, ILLEGAL_a32, opFPREM1, opFDECSTP, opFINCSTP, opFPREM, opFYL2XP1, opFSQRT, opFSINCOS, opFRNDINT, opFSCALE, opFSIN, opFCOS // clang-format on }; diff --git a/src/cpu/x87_ops_misc.h b/src/cpu/x87_ops_misc.h index 3a528f847..e21819f94 100644 --- a/src/cpu/x87_ops_misc.h +++ b/src/cpu/x87_ops_misc.h @@ -33,20 +33,6 @@ opFNOP(uint32_t fetchdat) return 0; } -static int -opFXTRACT(uint32_t fetchdat) -{ - double_decompose_t temp = (double_decompose_t) ST(0); - - FP_ENTER(); - cpu_state.pc++; - ST(0) = (double) temp.exponent; - x87_push((double) temp.mantissa); - CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); - CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi)); - return 0; -} - static int opFCLEX(uint32_t fetchdat) { @@ -511,6 +497,20 @@ opFCHS(uint32_t fetchdat) return 0; } +static int +opFXTRACT(uint32_t fetchdat) +{ + double_decompose_t temp = (double_decompose_t) ST(0); + + FP_ENTER(); + cpu_state.pc++; + ST(0) = (double) temp.exponent; + x87_push((double) temp.mantissa); + CLOCK_CYCLES_FPU((fpu_type >= FPU_487SX) ? (x87_timings.fxtract) : (x87_timings.fxtract * cpu_multi)); + CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fxtract) : (x87_concurrency.fxtract * cpu_multi)); + return 0; +} + static int opFABS(uint32_t fetchdat) { @@ -755,6 +755,7 @@ opFPREM(uint32_t fetchdat) CONCURRENCY_CYCLES((fpu_type >= FPU_487SX) ? (x87_concurrency.fprem) : (x87_concurrency.fprem * cpu_multi)); return 0; } + static int opFPREM1(uint32_t fetchdat) { diff --git a/src/include/86box/86box.h b/src/include/86box/86box.h index a4c0a5761..d472a3e03 100644 --- a/src/include/86box/86box.h +++ b/src/include/86box/86box.h @@ -129,7 +129,8 @@ extern uint32_t mem_size; /* (C) memory size (Installed on system board) extern uint32_t isa_mem_size; /* (C) memory size (ISA Memory Cards) */ extern int cpu, /* (C) cpu type */ cpu_use_dynarec, /* (C) cpu uses/needs Dyna */ - fpu_type; /* (C) fpu type */ + fpu_type, /* (C) fpu type */ + fpu_softfloat; /* (C) fpu uses softfloat */ extern int time_sync; /* (C) enable time sync */ extern int hdd_format_type; /* (C) hard disk file format */ extern int confirm_reset, /* (C) enable reset confirmation */ diff --git a/src/qt/qt_settingsmachine.cpp b/src/qt/qt_settingsmachine.cpp index e08b07997..2794af71b 100644 --- a/src/qt/qt_settingsmachine.cpp +++ b/src/qt/qt_settingsmachine.cpp @@ -105,6 +105,8 @@ SettingsMachine::save() cpu = ui->comboBoxSpeed->currentData().toInt(); fpu_type = ui->comboBoxFPU->currentData().toInt(); cpu_use_dynarec = ui->checkBoxDynamicRecompiler->isChecked() ? 1 : 0; + fpu_softfloat = (ui->checkBoxFPUSoftfloat->isChecked() && !cpu_use_dynarec) ? 1 : 0; + int64_t temp_mem_size; if (machine_get_ram_granularity(machine) < 1024) { temp_mem_size = ui->spinBoxRAM->value(); @@ -270,13 +272,22 @@ SettingsMachine::on_comboBoxSpeed_currentIndexChanged(int index) if (!(flags & CPU_SUPPORTS_DYNAREC)) { ui->checkBoxDynamicRecompiler->setChecked(false); ui->checkBoxDynamicRecompiler->setEnabled(false); + ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat); + ui->checkBoxFPUSoftfloat->setEnabled(cpu_use_dynarec ? false : true); } else if (flags & CPU_REQUIRES_DYNAREC) { ui->checkBoxDynamicRecompiler->setChecked(true); ui->checkBoxDynamicRecompiler->setEnabled(false); + ui->checkBoxFPUSoftfloat->setChecked(false); + ui->checkBoxFPUSoftfloat->setEnabled(false); } else { ui->checkBoxDynamicRecompiler->setChecked(cpu_use_dynarec); ui->checkBoxDynamicRecompiler->setEnabled(true); + ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat); + ui->checkBoxFPUSoftfloat->setEnabled(cpu_use_dynarec ? false : true); } +#else + ui->checkBoxFPUSoftfloat->setChecked(fpu_softfloat); + ui->checkBoxFPUSoftfloat->setEnabled(true); #endif // win_settings_machine_recalc_fpu diff --git a/src/qt/qt_settingsmachine.ui b/src/qt/qt_settingsmachine.ui index d5ff1ca59..ee8a048f4 100644 --- a/src/qt/qt_settingsmachine.ui +++ b/src/qt/qt_settingsmachine.ui @@ -200,6 +200,19 @@ + + + + + 3 + 3 + + + + Softfloat FPU + + + diff --git a/src/win/Makefile.mingw b/src/win/Makefile.mingw index 3725e6e09..b2e2ddf0b 100644 --- a/src/win/Makefile.mingw +++ b/src/win/Makefile.mingw @@ -241,7 +241,7 @@ PROG := 86Box ######################################################################### # Nothing should need changing from here on.. # ######################################################################### -VPATH := $(EXPATH) . $(CODEGEN) minitrace cpu \ +VPATH := $(EXPATH) . $(CODEGEN) minitrace cpu cpu/softfloat \ cdrom chipset device disk disk/minivhd floppy \ game machine mem printer \ sio sound \ @@ -551,7 +551,9 @@ CPUOBJ := $(DYNARECOBJ) \ $(CGTOBJ) \ cpu.o cpu_table.o fpu.o x86.o \ 8080.o 808x.o 386.o 386_common.o 386_dynarec.o 386_dynarec_ops.o \ - x86seg.o x87.o x87_timings.o + x86seg.o x87.o x87_timings.o \ + f2xm1.o fpatan.o fprem.o fsincos.o fyl2x.o poly.o softfloat.o softfloat16.o \ + softfloat-muladd.o softfloat-round-pack.o softfloat-specialize.o softfloatx80.o CHIPSETOBJ := 82c100.o acc2168.o \ contaq_82c59x.o \